diff --git a/.env b/.env index e3ececc2e54..ae266af80da 100644 --- a/.env +++ b/.env @@ -2,3 +2,4 @@ APP_IMAGE=gdcc/dataverse:unstable POSTGRES_VERSION=13 DATAVERSE_DB_USER=dataverse SOLR_VERSION=9.3.0 +SKIP_DEPLOY=0 \ No newline at end of file diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000000..9860024f70a --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +# https://www.git-scm.com/docs/gitattributes + +# This set mandatory LF line endings for .sh files preventing from windows users to having to change the value of their git config --global core.autocrlf to 'false' or 'input' +*.sh text eol=lf \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index b297dfc4ee8..7e6995d76d9 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -3,7 +3,7 @@ name: Bug report about: Did you encounter something unexpected or incorrect in the Dataverse software? We'd like to hear about it! title: '' -labels: '' +labels: 'Type: Bug' assignees: '' --- diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 7d5e0deea05..d6248537418 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -2,7 +2,7 @@ name: Feature request about: Suggest an idea or new feature for the Dataverse software! title: 'Feature Request/Idea:' -labels: '' +labels: 'Type: Feature' assignees: '' --- diff --git a/.github/workflows/deploy_beta_testing.yml b/.github/workflows/deploy_beta_testing.yml index 2443ef8b2e0..028f0140cc9 100644 --- a/.github/workflows/deploy_beta_testing.yml +++ b/.github/workflows/deploy_beta_testing.yml @@ -21,6 +21,9 @@ jobs: working-directory: src/main/resources/META-INF run: echo -e "dataverse.feature.api-session-auth=true" >> microprofile-config.properties + - name: Set build number + run: scripts/installer/custom-build-number + - name: Build application war run: mvn package diff --git a/.github/workflows/maven_unit_test.yml b/.github/workflows/maven_unit_test.yml index efa3fa4a471..a70c55fc31d 100644 --- a/.github/workflows/maven_unit_test.yml +++ b/.github/workflows/maven_unit_test.yml @@ -1,4 +1,4 @@ -name: Maven Unit Tests +name: Maven Tests on: push: @@ -28,6 +28,7 @@ jobs: continue-on-error: ${{ matrix.experimental }} runs-on: ubuntu-latest steps: + # Basic setup chores - uses: actions/checkout@v3 - name: Set up JDK ${{ matrix.jdk }} uses: actions/setup-java@v3 @@ -37,16 +38,110 @@ jobs: cache: maven # The reason why we use "install" here is that we want the submodules to be available in the next step. - # Also, we can cache them this way for jobs triggered by this one. - - name: Build with Maven + # Also, we can cache them this way for jobs triggered by this one. We need to skip ITs here, as we run + # them in the next job - but install usually runs through verify phase. + - name: Build with Maven and run unit tests run: > mvn -B -f modules/dataverse-parent -Dtarget.java.version=${{ matrix.jdk }} -DcompilerArgument=-Xlint:unchecked -P all-unit-tests + -DskipIntegrationTests -pl edu.harvard.iq:dataverse -am install - - name: Maven Code Coverage + # We don't want to cache the WAR file, so delete it + - run: rm -rf ~/.m2/repository/edu/harvard/iq/dataverse + + # Upload the built war file. For download, it will be wrapped in a ZIP by GitHub. + # See also https://github.com/actions/upload-artifact#zipped-artifact-downloads + - uses: actions/upload-artifact@v3 + with: + name: dataverse-java${{ matrix.jdk }}.war + path: target/dataverse*.war + retention-days: 7 + + # Store the build for the next step (integration test) to avoid recompilation and to transfer coverage reports + - run: | + tar -cvf java-builddir.tar target + tar -cvf java-m2-selection.tar ~/.m2/repository/io/gdcc/dataverse-* + - uses: actions/upload-artifact@v3 + with: + name: java-artifacts + path: | + java-builddir.tar + java-m2-selection.tar + retention-days: 3 + + integration-test: + runs-on: ubuntu-latest + needs: unittest + name: (${{ matrix.status}} / JDK ${{ matrix.jdk }}) Integration Tests + strategy: + fail-fast: false + matrix: + jdk: [ '17' ] + experimental: [ false ] + status: [ "Stable" ] + # + # JDK 17 builds disabled due to non-essential fails marking CI jobs as completely failed within + # Github Projects, PR lists etc. This was consensus on Slack #dv-tech. See issue #8094 + # (This is a limitation of how Github is currently handling these things.) + # + #include: + # - jdk: '17' + # experimental: true + # status: "Experimental" + continue-on-error: ${{ matrix.experimental }} + steps: + # Basic setup chores + - uses: actions/checkout@v3 + - name: Set up JDK ${{ matrix.jdk }} + uses: actions/setup-java@v3 + with: + java-version: ${{ matrix.jdk }} + distribution: temurin + cache: maven + + # Get the build output from the unit test job + - uses: actions/download-artifact@v3 + with: + name: java-artifacts + - run: | + tar -xvf java-builddir.tar + tar -xvf java-m2-selection.tar -C / + + # Run integration tests (but not unit tests again) + - run: mvn -DskipUnitTests -Dtarget.java.version=${{ matrix.jdk }} verify + + # Wrap up and send to coverage job + - run: tar -cvf java-reportdir.tar target/site + - uses: actions/upload-artifact@v3 + with: + name: java-reportdir + path: java-reportdir.tar + retention-days: 3 + + coverage-report: + runs-on: ubuntu-latest + needs: integration-test + name: Coverage Report Submission + steps: + # Basic setup chores + - uses: actions/checkout@v3 + - uses: actions/setup-java@v3 + with: + java-version: '17' + distribution: temurin + cache: maven + + # Get the build output from the integration test job + - uses: actions/download-artifact@v3 + with: + name: java-reportdir + - run: tar -xvf java-reportdir.tar + + # Deposit Code Coverage + - name: Deposit Code Coverage env: CI_NAME: github COVERALLS_SECRET: ${{ secrets.GITHUB_TOKEN }} @@ -57,22 +152,14 @@ jobs: -DrepoToken=${COVERALLS_SECRET} -DpullRequest=${{ github.event.number }} jacoco:report coveralls:report - # We don't want to cache the WAR file, so delete it - - run: rm -rf ~/.m2/repository/edu/harvard/iq/dataverse + # NOTE: this may be extended with adding a report to the build output, leave a comment, send to Sonarcloud, ... - # Upload the built war file. For download, it will be wrapped in a ZIP by GitHub. - # See also https://github.com/actions/upload-artifact#zipped-artifact-downloads - - uses: actions/upload-artifact@v3 - with: - name: dataverse-java${{ matrix.jdk }}.war - path: target/dataverse*.war - retention-days: 7 push-app-img: name: Publish App Image permissions: contents: read packages: write pull-requests: write - needs: unittest + needs: integration-test uses: ./.github/workflows/container_app_push.yml secrets: inherit diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b2be8f531c4..1430ba951a6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -56,7 +56,7 @@ If you are interested in working on the main Dataverse code, great! Before you s Please read http://guides.dataverse.org/en/latest/developers/version-control.html to understand how we use the "git flow" model of development and how we will encourage you to create a GitHub issue (if it doesn't exist already) to associate with your pull request. That page also includes tips on making a pull request. -After making your pull request, your goal should be to help it advance through our kanban board at https://github.com/orgs/IQSS/projects/2 . If no one has moved your pull request to the code review column in a timely manner, please reach out. Note that once a pull request is created for an issue, we'll remove the issue from the board so that we only track one card (the pull request). +After making your pull request, your goal should be to help it advance through our kanban board at https://github.com/orgs/IQSS/projects/34 . If no one has moved your pull request to the code review column in a timely manner, please reach out. Note that once a pull request is created for an issue, we'll remove the issue from the board so that we only track one card (the pull request). Thanks for your contribution! @@ -64,4 +64,4 @@ Thanks for your contribution! [Community Call]: https://dataverse.org/community-calls [dataverse-dev Google Group]: https://groups.google.com/group/dataverse-dev [community contributors]: https://docs.google.com/spreadsheets/d/1o9DD-MQ0WkrYaEFTD5rF_NtyL8aUISgURsAXSL7Budk/edit?usp=sharing -[dev efforts]: https://github.com/orgs/IQSS/projects/2#column-5298405 +[dev efforts]: https://github.com/orgs/IQSS/projects/34/views/6 diff --git a/README.md b/README.md index d40e5f228f7..651d0352dec 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ Dataverse® =============== -Dataverse is an [open source][] software platform for sharing, finding, citing, and preserving research data (developed by the [Data Science and Products team](http://www.iq.harvard.edu/people/people/data-science-products) at the [Institute for Quantitative Social Science](http://iq.harvard.edu/) and the [Dataverse community][]). +Dataverse is an [open source][] software platform for sharing, finding, citing, and preserving research data (developed by the [Dataverse team](https://dataverse.org/about) at the [Institute for Quantitative Social Science](https://iq.harvard.edu/) and the [Dataverse community][]). -[dataverse.org][] is our home on the web and shows a map of Dataverse installations around the world, a list of [features][], [integrations][] that have been made possible through [REST APIs][], our development [roadmap][], and more. +[dataverse.org][] is our home on the web and shows a map of Dataverse installations around the world, a list of [features][], [integrations][] that have been made possible through [REST APIs][], our [project board][], our development [roadmap][], and more. We maintain a demo site at [demo.dataverse.org][] which you are welcome to use for testing and evaluating Dataverse. @@ -15,7 +15,7 @@ We love contributors! Please see our [Contributing Guide][] for ways you can hel Dataverse is a trademark of President and Fellows of Harvard College and is registered in the United States. -[![Dataverse Project logo](src/main/webapp/resources/images/dataverseproject_logo.jpg?raw=true "Dataverse Project")](http://dataverse.org) +[![Dataverse Project logo](src/main/webapp/resources/images/dataverseproject_logo.jpg "Dataverse Project")](http://dataverse.org) [![API Test Status](https://jenkins.dataverse.org/buildStatus/icon?job=IQSS-dataverse-develop&subject=API%20Test%20Status)](https://jenkins.dataverse.org/job/IQSS-dataverse-develop/) [![API Test Coverage](https://img.shields.io/jenkins/coverage/jacoco?jobUrl=https%3A%2F%2Fjenkins.dataverse.org%2Fjob%2FIQSS-dataverse-develop&label=API%20Test%20Coverage)](https://jenkins.dataverse.org/job/IQSS-dataverse-develop/ws/target/coverage-it/index.html) @@ -26,15 +26,16 @@ Dataverse is a trademark of President and Fellows of Harvard College and is regi [dataverse.org]: https://dataverse.org [demo.dataverse.org]: https://demo.dataverse.org [Dataverse community]: https://dataverse.org/developers -[Installation Guide]: http://guides.dataverse.org/en/latest/installation/index.html +[Installation Guide]: https://guides.dataverse.org/en/latest/installation/index.html [latest release]: https://github.com/IQSS/dataverse/releases [features]: https://dataverse.org/software-features +[project board]: https://github.com/orgs/IQSS/projects/34 [roadmap]: https://www.iq.harvard.edu/roadmap-dataverse-project [integrations]: https://dataverse.org/integrations -[REST APIs]: http://guides.dataverse.org/en/latest/api/index.html +[REST APIs]: https://guides.dataverse.org/en/latest/api/index.html [Contributing Guide]: CONTRIBUTING.md [mailing list]: https://groups.google.com/group/dataverse-community [community call]: https://dataverse.org/community-calls -[chat.dataverse.org]: http://chat.dataverse.org +[chat.dataverse.org]: https://chat.dataverse.org [Dataverse Community Meeting]: https://dataverse.org/events [open source]: LICENSE.md diff --git a/conf/keycloak/docker-compose.yml b/conf/keycloak/docker-compose.yml index 2776f6572df..12b2382bd3d 100644 --- a/conf/keycloak/docker-compose.yml +++ b/conf/keycloak/docker-compose.yml @@ -3,13 +3,15 @@ version: "3.9" services: keycloak: - image: 'jboss/keycloak:16.1.1' + image: 'quay.io/keycloak/keycloak:21.0' + command: + - "start-dev" + - "--import-realm" environment: - - KEYCLOAK_USER=kcadmin - - KEYCLOAK_PASSWORD=kcpassword - - KEYCLOAK_IMPORT=/tmp/oidc-realm.json + - KEYCLOAK_ADMIN=kcadmin + - KEYCLOAK_ADMIN_PASSWORD=kcpassword - KEYCLOAK_LOGLEVEL=DEBUG ports: - "8090:8080" volumes: - - './oidc-realm.json:/tmp/oidc-realm.json' + - './test-realm.json:/opt/keycloak/data/import/test-realm.json' diff --git a/conf/keycloak/oidc-keycloak-auth-provider.json b/conf/keycloak/oidc-keycloak-auth-provider.json index 7d09fe5f36e..7e01bd4c325 100644 --- a/conf/keycloak/oidc-keycloak-auth-provider.json +++ b/conf/keycloak/oidc-keycloak-auth-provider.json @@ -3,6 +3,6 @@ "factoryAlias": "oidc", "title": "OIDC-Keycloak", "subtitle": "OIDC-Keycloak", - "factoryData": "type: oidc | issuer: http://keycloak.mydomain.com:8090/realms/oidc-realm | clientId: oidc-client | clientSecret: ss6gE8mODCDfqesQaSG3gwUwZqZt547E", + "factoryData": "type: oidc | issuer: http://keycloak.mydomain.com:8090/realms/test | clientId: test | clientSecret: 94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8", "enabled": true } diff --git a/conf/keycloak/oidc-realm.json b/conf/keycloak/oidc-realm.json deleted file mode 100644 index 1b77f2b4384..00000000000 --- a/conf/keycloak/oidc-realm.json +++ /dev/null @@ -1,2108 +0,0 @@ -{ - "id": "oidc-realm", - "realm": "oidc-realm", - "notBefore": 0, - "defaultSignatureAlgorithm": "RS256", - "revokeRefreshToken": false, - "refreshTokenMaxReuse": 0, - "accessTokenLifespan": 300, - "accessTokenLifespanForImplicitFlow": 900, - "ssoSessionIdleTimeout": 1800, - "ssoSessionMaxLifespan": 36000, - "ssoSessionIdleTimeoutRememberMe": 0, - "ssoSessionMaxLifespanRememberMe": 0, - "offlineSessionIdleTimeout": 2592000, - "offlineSessionMaxLifespanEnabled": false, - "offlineSessionMaxLifespan": 5184000, - "clientSessionIdleTimeout": 0, - "clientSessionMaxLifespan": 0, - "clientOfflineSessionIdleTimeout": 0, - "clientOfflineSessionMaxLifespan": 0, - "accessCodeLifespan": 60, - "accessCodeLifespanUserAction": 300, - "accessCodeLifespanLogin": 1800, - "actionTokenGeneratedByAdminLifespan": 43200, - "actionTokenGeneratedByUserLifespan": 300, - "oauth2DeviceCodeLifespan": 600, - "oauth2DevicePollingInterval": 5, - "enabled": true, - "sslRequired": "external", - "registrationAllowed": false, - "registrationEmailAsUsername": false, - "rememberMe": false, - "verifyEmail": false, - "loginWithEmailAllowed": true, - "duplicateEmailsAllowed": false, - "resetPasswordAllowed": false, - "editUsernameAllowed": false, - "bruteForceProtected": false, - "permanentLockout": false, - "maxFailureWaitSeconds": 900, - "minimumQuickLoginWaitSeconds": 60, - "waitIncrementSeconds": 60, - "quickLoginCheckMilliSeconds": 1000, - "maxDeltaTimeSeconds": 43200, - "failureFactor": 30, - "roles": { - "realm": [ - { - "id": "13d76240-fcf8-4361-9dbf-de268717cfb2", - "name": "uma_authorization", - "description": "${role_uma_authorization}", - "composite": false, - "clientRole": false, - "containerId": "oidc-realm", - "attributes": {} - }, - { - "id": "88b414c4-3516-4486-8f8b-a811ed0e0ce5", - "name": "default-roles-oidc-realm", - "description": "${role_default-roles}", - "composite": true, - "composites": { - "realm": [ - "offline_access", - "uma_authorization" - ] - }, - "clientRole": false, - "containerId": "oidc-realm", - "attributes": {} - }, - { - "id": "b907fd4e-0e54-461c-9411-3f736eef7d2f", - "name": "offline_access", - "description": "${role_offline-access}", - "composite": false, - "clientRole": false, - "containerId": "oidc-realm", - "attributes": {} - } - ], - "client": { - "realm-management": [ - { - "id": "39342ea9-0b4e-4841-8996-433759e9297f", - "name": "create-client", - "description": "${role_create-client}", - "composite": false, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - }, - { - "id": "f8680034-617d-45d3-9801-7bf0d704c549", - "name": "manage-users", - "description": "${role_manage-users}", - "composite": false, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - }, - { - "id": "b08e4cc3-71e2-4395-b66b-fb1277b48b88", - "name": "manage-realm", - "description": "${role_manage-realm}", - "composite": false, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - }, - { - "id": "c15dc407-d012-43af-9a21-a2923e1d7b74", - "name": "manage-events", - "description": "${role_manage-events}", - "composite": false, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - }, - { - "id": "66c07cb7-42cd-4155-8485-6cc7bd37cba9", - "name": "view-realm", - "description": "${role_view-realm}", - "composite": false, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - }, - { - "id": "0419515f-4ab8-43ca-ac69-e842195813c0", - "name": "view-events", - "description": "${role_view-events}", - "composite": false, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - }, - { - "id": "aa553d5a-b2dc-4f81-979a-2af0a019fee0", - "name": "impersonation", - "description": "${role_impersonation}", - "composite": false, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - }, - { - "id": "9567e1e9-b755-43a8-93ed-d5929391316f", - "name": "manage-clients", - "description": "${role_manage-clients}", - "composite": false, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - }, - { - "id": "e3dab69f-7323-4aad-bf98-8b7697f36d57", - "name": "query-users", - "description": "${role_query-users}", - "composite": false, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - }, - { - "id": "ee8a4855-d0d5-4261-bdba-b419d304a824", - "name": "query-groups", - "description": "${role_query-groups}", - "composite": false, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - }, - { - "id": "4f251212-e922-4ac0-9cce-3ada607648d2", - "name": "view-identity-providers", - "description": "${role_view-identity-providers}", - "composite": false, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - }, - { - "id": "34e1dc59-a975-424f-887b-52465e184a4b", - "name": "realm-admin", - "description": "${role_realm-admin}", - "composite": true, - "composites": { - "client": { - "realm-management": [ - "create-client", - "manage-users", - "manage-realm", - "manage-events", - "view-realm", - "view-events", - "impersonation", - "manage-clients", - "query-users", - "view-identity-providers", - "query-groups", - "view-clients", - "view-users", - "manage-authorization", - "manage-identity-providers", - "query-realms", - "query-clients", - "view-authorization" - ] - } - }, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - }, - { - "id": "d35aca04-0182-40d3-96b8-1ce5cc118729", - "name": "view-clients", - "description": "${role_view-clients}", - "composite": true, - "composites": { - "client": { - "realm-management": [ - "query-clients" - ] - } - }, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - }, - { - "id": "7d3b28d5-471a-4b2b-bc80-56d4ff80fd28", - "name": "view-users", - "description": "${role_view-users}", - "composite": true, - "composites": { - "client": { - "realm-management": [ - "query-users", - "query-groups" - ] - } - }, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - }, - { - "id": "651059eb-fc1a-4f8d-9ced-ed28b0a2f965", - "name": "manage-authorization", - "description": "${role_manage-authorization}", - "composite": false, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - }, - { - "id": "73f447e9-def8-4214-8516-56571f2c6f65", - "name": "manage-identity-providers", - "description": "${role_manage-identity-providers}", - "composite": false, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - }, - { - "id": "1b5f7c39-885e-4246-8cf5-25769544fc3d", - "name": "query-realms", - "description": "${role_query-realms}", - "composite": false, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - }, - { - "id": "350da4c1-69d4-4557-a9a8-8ba760db0225", - "name": "query-clients", - "description": "${role_query-clients}", - "composite": false, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - }, - { - "id": "43d51082-6922-4765-8022-529d91a4603f", - "name": "view-authorization", - "description": "${role_view-authorization}", - "composite": false, - "clientRole": true, - "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "attributes": {} - } - ], - "security-admin-console": [], - "admin-cli": [], - "account-console": [], - "broker": [], - "oidc-client": [], - "account": [ - { - "id": "a163535c-71de-4b2d-9530-26b25eeb1c1e", - "name": "delete-account", - "description": "${role_delete-account}", - "composite": false, - "clientRole": true, - "containerId": "aed2e103-ee29-4d5c-a34e-1b8c65b7d537", - "attributes": {} - }, - { - "id": "851c6a9f-bce7-4c70-be82-084c25d61b25", - "name": "manage-account", - "composite": false, - "clientRole": true, - "containerId": "aed2e103-ee29-4d5c-a34e-1b8c65b7d537", - "attributes": {} - } - ] - } - }, - "groups": [], - "defaultRole": { - "id": "88b414c4-3516-4486-8f8b-a811ed0e0ce5", - "name": "default-roles-oidc-realm", - "description": "${role_default-roles}", - "composite": true, - "clientRole": false, - "containerId": "oidc-realm" - }, - "requiredCredentials": [ - "password" - ], - "otpPolicyType": "totp", - "otpPolicyAlgorithm": "HmacSHA1", - "otpPolicyInitialCounter": 0, - "otpPolicyDigits": 6, - "otpPolicyLookAheadWindow": 1, - "otpPolicyPeriod": 30, - "otpSupportedApplications": [ - "FreeOTP", - "Google Authenticator" - ], - "webAuthnPolicyRpEntityName": "keycloak", - "webAuthnPolicySignatureAlgorithms": [ - "ES256" - ], - "webAuthnPolicyRpId": "", - "webAuthnPolicyAttestationConveyancePreference": "not specified", - "webAuthnPolicyAuthenticatorAttachment": "not specified", - "webAuthnPolicyRequireResidentKey": "not specified", - "webAuthnPolicyUserVerificationRequirement": "not specified", - "webAuthnPolicyCreateTimeout": 0, - "webAuthnPolicyAvoidSameAuthenticatorRegister": false, - "webAuthnPolicyAcceptableAaguids": [], - "webAuthnPolicyPasswordlessRpEntityName": "keycloak", - "webAuthnPolicyPasswordlessSignatureAlgorithms": [ - "ES256" - ], - "webAuthnPolicyPasswordlessRpId": "", - "webAuthnPolicyPasswordlessAttestationConveyancePreference": "not specified", - "webAuthnPolicyPasswordlessAuthenticatorAttachment": "not specified", - "webAuthnPolicyPasswordlessRequireResidentKey": "not specified", - "webAuthnPolicyPasswordlessUserVerificationRequirement": "not specified", - "webAuthnPolicyPasswordlessCreateTimeout": 0, - "webAuthnPolicyPasswordlessAvoidSameAuthenticatorRegister": false, - "webAuthnPolicyPasswordlessAcceptableAaguids": [], - "users": [ - { - "username": "kcuser", - "enabled": true, - "totp": false, - "emailVerified": true, - "firstName": "Test", - "lastName": "Test", - "email": "test@test.com", - "credentials": [ - { - "type": "password", - "value": "kcpassword" - } - ] - } - ], - "scopeMappings": [ - { - "clientScope": "offline_access", - "roles": [ - "offline_access" - ] - } - ], - "clientScopeMappings": { - "account": [ - { - "client": "account-console", - "roles": [ - "manage-account" - ] - } - ] - }, - "clients": [ - { - "id": "aed2e103-ee29-4d5c-a34e-1b8c65b7d537", - "clientId": "account", - "name": "${client_account}", - "rootUrl": "${authBaseUrl}", - "baseUrl": "/realms/oidc-realm/account/", - "surrogateAuthRequired": false, - "enabled": true, - "alwaysDisplayInConsole": false, - "clientAuthenticatorType": "client-secret", - "redirectUris": [ - "/realms/oidc-realm/account/*" - ], - "webOrigins": [], - "notBefore": 0, - "bearerOnly": false, - "consentRequired": false, - "standardFlowEnabled": true, - "implicitFlowEnabled": false, - "directAccessGrantsEnabled": false, - "serviceAccountsEnabled": false, - "publicClient": true, - "frontchannelLogout": false, - "protocol": "openid-connect", - "attributes": {}, - "authenticationFlowBindingOverrides": {}, - "fullScopeAllowed": false, - "nodeReRegistrationTimeout": 0, - "defaultClientScopes": [ - "web-origins", - "roles", - "profile", - "email" - ], - "optionalClientScopes": [ - "address", - "phone", - "offline_access", - "microprofile-jwt" - ] - }, - { - "id": "1e821c0e-f6b9-4324-9b23-e82b5431fb72", - "clientId": "account-console", - "name": "${client_account-console}", - "rootUrl": "${authBaseUrl}", - "baseUrl": "/realms/oidc-realm/account/", - "surrogateAuthRequired": false, - "enabled": true, - "alwaysDisplayInConsole": false, - "clientAuthenticatorType": "client-secret", - "redirectUris": [ - "/realms/oidc-realm/account/*" - ], - "webOrigins": [], - "notBefore": 0, - "bearerOnly": false, - "consentRequired": false, - "standardFlowEnabled": true, - "implicitFlowEnabled": false, - "directAccessGrantsEnabled": false, - "serviceAccountsEnabled": false, - "publicClient": true, - "frontchannelLogout": false, - "protocol": "openid-connect", - "attributes": { - "pkce.code.challenge.method": "S256" - }, - "authenticationFlowBindingOverrides": {}, - "fullScopeAllowed": false, - "nodeReRegistrationTimeout": 0, - "protocolMappers": [ - { - "id": "397616ab-4124-4a13-92b6-317423e818a3", - "name": "audience resolve", - "protocol": "openid-connect", - "protocolMapper": "oidc-audience-resolve-mapper", - "consentRequired": false, - "config": {} - } - ], - "defaultClientScopes": [ - "web-origins", - "roles", - "profile", - "email" - ], - "optionalClientScopes": [ - "address", - "phone", - "offline_access", - "microprofile-jwt" - ] - }, - { - "id": "dddcc3e0-d742-422b-8b5f-84a292ea9d66", - "clientId": "admin-cli", - "name": "${client_admin-cli}", - "surrogateAuthRequired": false, - "enabled": true, - "alwaysDisplayInConsole": false, - "clientAuthenticatorType": "client-secret", - "redirectUris": [], - "webOrigins": [], - "notBefore": 0, - "bearerOnly": false, - "consentRequired": false, - "standardFlowEnabled": false, - "implicitFlowEnabled": false, - "directAccessGrantsEnabled": true, - "serviceAccountsEnabled": false, - "publicClient": true, - "frontchannelLogout": false, - "protocol": "openid-connect", - "attributes": {}, - "authenticationFlowBindingOverrides": {}, - "fullScopeAllowed": false, - "nodeReRegistrationTimeout": 0, - "defaultClientScopes": [ - "web-origins", - "roles", - "profile", - "email" - ], - "optionalClientScopes": [ - "address", - "phone", - "offline_access", - "microprofile-jwt" - ] - }, - { - "id": "df6f6cd0-a046-492f-84ac-b4fe31909be4", - "clientId": "broker", - "name": "${client_broker}", - "surrogateAuthRequired": false, - "enabled": true, - "alwaysDisplayInConsole": false, - "clientAuthenticatorType": "client-secret", - "redirectUris": [], - "webOrigins": [], - "notBefore": 0, - "bearerOnly": true, - "consentRequired": false, - "standardFlowEnabled": true, - "implicitFlowEnabled": false, - "directAccessGrantsEnabled": false, - "serviceAccountsEnabled": false, - "publicClient": false, - "frontchannelLogout": false, - "protocol": "openid-connect", - "attributes": {}, - "authenticationFlowBindingOverrides": {}, - "fullScopeAllowed": false, - "nodeReRegistrationTimeout": 0, - "defaultClientScopes": [ - "web-origins", - "roles", - "profile", - "email" - ], - "optionalClientScopes": [ - "address", - "phone", - "offline_access", - "microprofile-jwt" - ] - }, - { - "id": "c0af31b9-21aa-4e70-baf3-8d68850c4081", - "clientId": "oidc-client", - "surrogateAuthRequired": false, - "enabled": true, - "alwaysDisplayInConsole": false, - "clientAuthenticatorType": "client-secret", - "secret": "ss6gE8mODCDfqesQaSG3gwUwZqZt547E", - "redirectUris": [ - "*" - ], - "webOrigins": [ - "+" - ], - "notBefore": 0, - "bearerOnly": false, - "consentRequired": false, - "standardFlowEnabled": true, - "implicitFlowEnabled": false, - "directAccessGrantsEnabled": false, - "serviceAccountsEnabled": false, - "publicClient": false, - "frontchannelLogout": false, - "protocol": "openid-connect", - "attributes": { - "saml.force.post.binding": "false", - "saml.multivalued.roles": "false", - "oauth2.device.authorization.grant.enabled": "false", - "use.jwks.url": "true", - "backchannel.logout.revoke.offline.tokens": "false", - "saml.server.signature.keyinfo.ext": "false", - "use.refresh.tokens": "true", - "jwt.credential.certificate": "MIICpTCCAY0CBgGE8V6o6TANBgkqhkiG9w0BAQsFADAWMRQwEgYDVQQDDAtvaWRjLWNsaWVudDAeFw0yMjEyMDgxMDUyMDNaFw0zMjEyMDgxMDUzNDNaMBYxFDASBgNVBAMMC29pZGMtY2xpZW50MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArUffTl+jXWzyY3T4VVtkiGyNnY+RgyAXUzz+dxT7wUQaYSiNPvmaxnio555pWjR403SRUjVxM8eJYgHK9s43qQWdheXBIHyLKaQfjVsTtSmHgFtPmjk+kweQs6fxUi5CNvtx4RTCaOK5wV8q5q1X7mb8cZ5+gLSx1f/pHtayFXMT75nV04aZKWgPztPz8w+QXUx9cuFY4OIiTdRbdyfr1oOiDtMbxxA22tggB/HSMVkSckT3LSPj7fJKJMPFYi/g1AXxGipX/q8XkmOBrvNePCpH0F/IZbC1vXEsDC6urfoijOdiZgPMobuADmWHPiw2zgCN8qa6QuLFaI+JduXT9QIDAQABMA0GCSqGSIb3DQEBCwUAA4IBAQCEOYRHkH8DnBucb+uN5c9U/fZY+mpglxzZvby7dGBXfVwLN+eP1kGcQPaFi+nshk7FgF4mR5/cmuAPZt+YBbgP0z37D49nB7S6sniwzfhCAAplOT4vmm+MjperTDsWFUGhQZJvN/jxqP2Xccw7N//ReYi7yOlmWhwGyqQyTi0ySbE3BY5eFvUKepekybYi/15XlyF8lwS2jH1MvnJAxAMNVpVUcP4wTnq/dOw5ybrVWF0mPnA8KVzTPuPE5nzZvZ3rkXQeEJTffIToR+T/DH/KTLXcNUtx4nG0ajJ0gM6iVAXGnKlI9Viq/M5Ese+52I6rQmxTsFMn57LNzKgMpWcE", - "oidc.ciba.grant.enabled": "false", - "use.jwks.string": "false", - "backchannel.logout.session.required": "false", - "client_credentials.use_refresh_token": "false", - "require.pushed.authorization.requests": "false", - "saml.client.signature": "false", - "id.token.as.detached.signature": "false", - "saml.assertion.signature": "false", - "saml.encrypt": "false", - "saml.server.signature": "false", - "exclude.session.state.from.auth.response": "false", - "saml.artifact.binding": "false", - "saml_force_name_id_format": "false", - "tls.client.certificate.bound.access.tokens": "false", - "saml.authnstatement": "false", - "display.on.consent.screen": "false", - "saml.onetimeuse.condition": "false" - }, - "authenticationFlowBindingOverrides": {}, - "fullScopeAllowed": true, - "nodeReRegistrationTimeout": -1, - "defaultClientScopes": [ - "web-origins", - "roles", - "profile", - "email" - ], - "optionalClientScopes": [ - "address", - "phone", - "offline_access", - "microprofile-jwt" - ] - }, - { - "id": "43ffb712-f233-48e2-ae79-d6993bac34a5", - "clientId": "realm-management", - "name": "${client_realm-management}", - "surrogateAuthRequired": false, - "enabled": true, - "alwaysDisplayInConsole": false, - "clientAuthenticatorType": "client-secret", - "redirectUris": [], - "webOrigins": [], - "notBefore": 0, - "bearerOnly": true, - "consentRequired": false, - "standardFlowEnabled": true, - "implicitFlowEnabled": false, - "directAccessGrantsEnabled": false, - "serviceAccountsEnabled": false, - "publicClient": false, - "frontchannelLogout": false, - "protocol": "openid-connect", - "attributes": {}, - "authenticationFlowBindingOverrides": {}, - "fullScopeAllowed": false, - "nodeReRegistrationTimeout": 0, - "defaultClientScopes": [ - "web-origins", - "roles", - "profile", - "email" - ], - "optionalClientScopes": [ - "address", - "phone", - "offline_access", - "microprofile-jwt" - ] - }, - { - "id": "3747f98f-efbb-49ef-8238-a349bf5ab409", - "clientId": "security-admin-console", - "name": "${client_security-admin-console}", - "rootUrl": "${authAdminUrl}", - "baseUrl": "/admin/oidc-realm/console/", - "surrogateAuthRequired": false, - "enabled": true, - "alwaysDisplayInConsole": false, - "clientAuthenticatorType": "client-secret", - "redirectUris": [ - "/admin/oidc-realm/console/*" - ], - "webOrigins": [ - "+" - ], - "notBefore": 0, - "bearerOnly": false, - "consentRequired": false, - "standardFlowEnabled": true, - "implicitFlowEnabled": false, - "directAccessGrantsEnabled": false, - "serviceAccountsEnabled": false, - "publicClient": true, - "frontchannelLogout": false, - "protocol": "openid-connect", - "attributes": { - "pkce.code.challenge.method": "S256" - }, - "authenticationFlowBindingOverrides": {}, - "fullScopeAllowed": false, - "nodeReRegistrationTimeout": 0, - "protocolMappers": [ - { - "id": "2fbdf6c9-ee69-4edc-b780-ec62aecfc519", - "name": "locale", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-attribute-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "locale", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "locale", - "jsonType.label": "String" - } - } - ], - "defaultClientScopes": [ - "web-origins", - "roles", - "profile", - "email" - ], - "optionalClientScopes": [ - "address", - "phone", - "offline_access", - "microprofile-jwt" - ] - } - ], - "clientScopes": [ - { - "id": "f76f507d-7d1c-495b-9504-47830b3834f1", - "name": "phone", - "description": "OpenID Connect built-in scope: phone", - "protocol": "openid-connect", - "attributes": { - "include.in.token.scope": "true", - "display.on.consent.screen": "true", - "consent.screen.text": "${phoneScopeConsentText}" - }, - "protocolMappers": [ - { - "id": "be849ec8-1747-4efb-bc00-beeaf44f11c8", - "name": "phone number verified", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-attribute-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "phoneNumberVerified", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "phone_number_verified", - "jsonType.label": "boolean" - } - }, - { - "id": "8e8600ec-4290-435d-b109-9f0547cb4a1d", - "name": "phone number", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-attribute-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "phoneNumber", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "phone_number", - "jsonType.label": "String" - } - } - ] - }, - { - "id": "54b87197-5309-4b2c-8ad9-f561a0fc178a", - "name": "role_list", - "description": "SAML role list", - "protocol": "saml", - "attributes": { - "consent.screen.text": "${samlRoleListScopeConsentText}", - "display.on.consent.screen": "true" - }, - "protocolMappers": [ - { - "id": "5fd831af-19a5-4a9c-b44f-2a806fae011c", - "name": "role list", - "protocol": "saml", - "protocolMapper": "saml-role-list-mapper", - "consentRequired": false, - "config": { - "single": "false", - "attribute.nameformat": "Basic", - "attribute.name": "Role" - } - } - ] - }, - { - "id": "2f85470d-8cb7-4f07-8602-47342d68af86", - "name": "web-origins", - "description": "OpenID Connect scope for add allowed web origins to the access token", - "protocol": "openid-connect", - "attributes": { - "include.in.token.scope": "false", - "display.on.consent.screen": "false", - "consent.screen.text": "" - }, - "protocolMappers": [ - { - "id": "c5d2aafc-f72d-4d7b-9d88-cd759f0e045e", - "name": "allowed web origins", - "protocol": "openid-connect", - "protocolMapper": "oidc-allowed-origins-mapper", - "consentRequired": false, - "config": {} - } - ] - }, - { - "id": "528face9-229a-4adf-98d8-68b1a22e880d", - "name": "microprofile-jwt", - "description": "Microprofile - JWT built-in scope", - "protocol": "openid-connect", - "attributes": { - "include.in.token.scope": "true", - "display.on.consent.screen": "false" - }, - "protocolMappers": [ - { - "id": "89240a7c-10f3-4e09-9d6b-41955b86c58d", - "name": "groups", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-realm-role-mapper", - "consentRequired": false, - "config": { - "multivalued": "true", - "userinfo.token.claim": "true", - "user.attribute": "foo", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "groups", - "jsonType.label": "String" - } - }, - { - "id": "15b6db72-4870-480e-a675-87f87df5f8a5", - "name": "upn", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-property-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "username", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "upn", - "jsonType.label": "String" - } - } - ] - }, - { - "id": "cdd11477-b02b-4886-bc6d-cf4b728ebc0e", - "name": "email", - "description": "OpenID Connect built-in scope: email", - "protocol": "openid-connect", - "attributes": { - "include.in.token.scope": "true", - "display.on.consent.screen": "true", - "consent.screen.text": "${emailScopeConsentText}" - }, - "protocolMappers": [ - { - "id": "627b9f4f-23d6-4480-adf4-264faf58de33", - "name": "email verified", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-property-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "emailVerified", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "email_verified", - "jsonType.label": "boolean" - } - }, - { - "id": "6a2adf2e-db2d-4ebe-8d48-f658f9b4a5ca", - "name": "email", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-property-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "email", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "email", - "jsonType.label": "String" - } - } - ] - }, - { - "id": "8f830142-b3f1-40f0-82e2-ceed68857a40", - "name": "roles", - "description": "OpenID Connect scope for add user roles to the access token", - "protocol": "openid-connect", - "attributes": { - "include.in.token.scope": "false", - "display.on.consent.screen": "true", - "consent.screen.text": "${rolesScopeConsentText}" - }, - "protocolMappers": [ - { - "id": "28a96dc6-c4dc-4aae-b316-28b56dccd077", - "name": "audience resolve", - "protocol": "openid-connect", - "protocolMapper": "oidc-audience-resolve-mapper", - "consentRequired": false, - "config": {} - }, - { - "id": "3e81050f-540e-4f3d-9abf-86406e484f76", - "name": "realm roles", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-realm-role-mapper", - "consentRequired": false, - "config": { - "user.attribute": "foo", - "access.token.claim": "true", - "claim.name": "realm_access.roles", - "jsonType.label": "String", - "multivalued": "true" - } - }, - { - "id": "13afa1f4-3fac-4c90-a9b4-e84e682f46e9", - "name": "client roles", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-client-role-mapper", - "consentRequired": false, - "config": { - "user.attribute": "foo", - "access.token.claim": "true", - "claim.name": "resource_access.${client_id}.roles", - "jsonType.label": "String", - "multivalued": "true" - } - } - ] - }, - { - "id": "3beac2fc-e947-408f-8422-ca9a1e66a258", - "name": "address", - "description": "OpenID Connect built-in scope: address", - "protocol": "openid-connect", - "attributes": { - "include.in.token.scope": "true", - "display.on.consent.screen": "true", - "consent.screen.text": "${addressScopeConsentText}" - }, - "protocolMappers": [ - { - "id": "12911891-db5c-4a35-80fa-555c5eda7e68", - "name": "address", - "protocol": "openid-connect", - "protocolMapper": "oidc-address-mapper", - "consentRequired": false, - "config": { - "user.attribute.formatted": "formatted", - "user.attribute.country": "country", - "user.attribute.postal_code": "postal_code", - "userinfo.token.claim": "true", - "user.attribute.street": "street", - "id.token.claim": "true", - "user.attribute.region": "region", - "access.token.claim": "true", - "user.attribute.locality": "locality" - } - } - ] - }, - { - "id": "8a29297a-e6f6-41ae-b25d-8a14236de535", - "name": "offline_access", - "description": "OpenID Connect built-in scope: offline_access", - "protocol": "openid-connect", - "attributes": { - "consent.screen.text": "${offlineAccessScopeConsentText}", - "display.on.consent.screen": "true" - } - }, - { - "id": "ce1622c5-701f-4e3e-9d2d-8dae0f07a295", - "name": "profile", - "description": "OpenID Connect built-in scope: profile", - "protocol": "openid-connect", - "attributes": { - "include.in.token.scope": "true", - "display.on.consent.screen": "true", - "consent.screen.text": "${profileScopeConsentText}" - }, - "protocolMappers": [ - { - "id": "98cc62b8-250a-4087-92da-bb0f0931e675", - "name": "full name", - "protocol": "openid-connect", - "protocolMapper": "oidc-full-name-mapper", - "consentRequired": false, - "config": { - "id.token.claim": "true", - "access.token.claim": "true", - "userinfo.token.claim": "true" - } - }, - { - "id": "b99c8c44-4cc9-4c87-a5a1-c14e64d472ae", - "name": "given name", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-property-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "firstName", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "given_name", - "jsonType.label": "String" - } - }, - { - "id": "903d5932-bdec-42bc-a53c-3cce93deaa1c", - "name": "zoneinfo", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-attribute-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "zoneinfo", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "zoneinfo", - "jsonType.label": "String" - } - }, - { - "id": "ccbdc095-28f7-4769-8261-2e32c7b6fab0", - "name": "picture", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-attribute-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "picture", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "picture", - "jsonType.label": "String" - } - }, - { - "id": "22a4a38c-f755-44f3-b847-803c7fb3cef5", - "name": "birthdate", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-attribute-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "birthdate", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "birthdate", - "jsonType.label": "String" - } - }, - { - "id": "78726920-b4e2-4ed2-b9e0-df38a7f82376", - "name": "updated at", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-attribute-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "updatedAt", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "updated_at", - "jsonType.label": "String" - } - }, - { - "id": "c64c6eb8-5cbe-4092-bf2c-dd02b8c0e0e8", - "name": "family name", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-property-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "lastName", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "family_name", - "jsonType.label": "String" - } - }, - { - "id": "306784d8-8da1-48d8-92a3-dccfff83bcaf", - "name": "middle name", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-attribute-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "middleName", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "middle_name", - "jsonType.label": "String" - } - }, - { - "id": "0ff127fa-774e-43a8-a1fc-47ea3f307aa1", - "name": "website", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-attribute-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "website", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "website", - "jsonType.label": "String" - } - }, - { - "id": "8989c6f8-25c5-4d02-aa06-25b3b77fc227", - "name": "profile", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-attribute-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "profile", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "profile", - "jsonType.label": "String" - } - }, - { - "id": "3b67000c-9cbf-43ee-9e05-26f560871897", - "name": "gender", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-attribute-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "gender", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "gender", - "jsonType.label": "String" - } - }, - { - "id": "c28b04de-2770-423e-9b9a-b3321d7300e2", - "name": "nickname", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-attribute-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "nickname", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "nickname", - "jsonType.label": "String" - } - }, - { - "id": "fd791ed4-d4ab-4df9-81b4-c69a3134bcab", - "name": "username", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-property-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "username", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "preferred_username", - "jsonType.label": "String" - } - }, - { - "id": "c7378ce5-3673-47b2-9ebc-92c772bebf9f", - "name": "locale", - "protocol": "openid-connect", - "protocolMapper": "oidc-usermodel-attribute-mapper", - "consentRequired": false, - "config": { - "userinfo.token.claim": "true", - "user.attribute": "locale", - "id.token.claim": "true", - "access.token.claim": "true", - "claim.name": "locale", - "jsonType.label": "String" - } - } - ] - } - ], - "defaultDefaultClientScopes": [ - "web-origins", - "role_list", - "roles", - "email", - "profile" - ], - "defaultOptionalClientScopes": [ - "address", - "microprofile-jwt", - "offline_access", - "phone" - ], - "browserSecurityHeaders": { - "contentSecurityPolicyReportOnly": "", - "xContentTypeOptions": "nosniff", - "xRobotsTag": "none", - "xFrameOptions": "SAMEORIGIN", - "contentSecurityPolicy": "frame-src 'self'; frame-ancestors 'self'; object-src 'none';", - "xXSSProtection": "1; mode=block", - "strictTransportSecurity": "max-age=31536000; includeSubDomains" - }, - "smtpServer": {}, - "eventsEnabled": false, - "eventsListeners": [ - "jboss-logging" - ], - "enabledEventTypes": [], - "adminEventsEnabled": false, - "adminEventsDetailsEnabled": false, - "identityProviders": [], - "identityProviderMappers": [], - "components": { - "org.keycloak.services.clientregistration.policy.ClientRegistrationPolicy": [ - { - "id": "8e2d0c22-0627-4115-9f14-4225244333d9", - "name": "Trusted Hosts", - "providerId": "trusted-hosts", - "subType": "anonymous", - "subComponents": {}, - "config": { - "host-sending-registration-request-must-match": [ - "true" - ], - "client-uris-must-match": [ - "true" - ] - } - }, - { - "id": "45bdde87-a364-4d66-a12e-1a4fd42c85fb", - "name": "Full Scope Disabled", - "providerId": "scope", - "subType": "anonymous", - "subComponents": {}, - "config": {} - }, - { - "id": "7b7d3215-68d2-41db-bc0f-db0a45934a84", - "name": "Allowed Client Scopes", - "providerId": "allowed-client-templates", - "subType": "anonymous", - "subComponents": {}, - "config": { - "allow-default-scopes": [ - "true" - ] - } - }, - { - "id": "e067781a-6058-4f2b-9408-3390e9854cf8", - "name": "Consent Required", - "providerId": "consent-required", - "subType": "anonymous", - "subComponents": {}, - "config": {} - }, - { - "id": "296be954-8084-45c8-b6f3-94d53f7341f6", - "name": "Allowed Protocol Mapper Types", - "providerId": "allowed-protocol-mappers", - "subType": "anonymous", - "subComponents": {}, - "config": { - "allowed-protocol-mapper-types": [ - "saml-role-list-mapper", - "saml-user-property-mapper", - "oidc-usermodel-attribute-mapper", - "oidc-address-mapper", - "oidc-sha256-pairwise-sub-mapper", - "saml-user-attribute-mapper", - "oidc-usermodel-property-mapper", - "oidc-full-name-mapper" - ] - } - }, - { - "id": "b9a2a484-aee1-4633-aa37-a9ab2b74a239", - "name": "Allowed Client Scopes", - "providerId": "allowed-client-templates", - "subType": "authenticated", - "subComponents": {}, - "config": { - "allow-default-scopes": [ - "true" - ] - } - }, - { - "id": "016e4914-a32c-40fa-8aab-3eb25a411df5", - "name": "Max Clients Limit", - "providerId": "max-clients", - "subType": "anonymous", - "subComponents": {}, - "config": { - "max-clients": [ - "200" - ] - } - }, - { - "id": "a4fb2fa3-93b8-4497-8047-424f70f298c7", - "name": "Allowed Protocol Mapper Types", - "providerId": "allowed-protocol-mappers", - "subType": "authenticated", - "subComponents": {}, - "config": { - "allowed-protocol-mapper-types": [ - "oidc-sha256-pairwise-sub-mapper", - "oidc-full-name-mapper", - "saml-user-property-mapper", - "saml-role-list-mapper", - "oidc-usermodel-attribute-mapper", - "oidc-address-mapper", - "oidc-usermodel-property-mapper", - "saml-user-attribute-mapper" - ] - } - } - ], - "org.keycloak.keys.KeyProvider": [ - { - "id": "31b693fa-2b95-47a6-96a1-dfff868ca1df", - "name": "rsa-enc-generated", - "providerId": "rsa-enc-generated", - "subComponents": {}, - "config": { - "priority": [ - "100" - ], - "algorithm": [ - "RSA-OAEP" - ] - } - }, - { - "id": "f1e63d09-45a0-4382-8346-0408ee906649", - "name": "hmac-generated", - "providerId": "hmac-generated", - "subComponents": {}, - "config": { - "priority": [ - "100" - ], - "algorithm": [ - "HS256" - ] - } - }, - { - "id": "99084d92-06f5-4787-b932-a40b5377f3cb", - "name": "rsa-generated", - "providerId": "rsa-generated", - "subComponents": {}, - "config": { - "priority": [ - "100" - ] - } - }, - { - "id": "9887f1bf-b4f7-4646-9919-a9dbde13ce74", - "name": "aes-generated", - "providerId": "aes-generated", - "subComponents": {}, - "config": { - "priority": [ - "100" - ] - } - } - ] - }, - "internationalizationEnabled": false, - "supportedLocales": [], - "authenticationFlows": [ - { - "id": "a7f91199-178d-4399-8319-5063ffcc37b0", - "alias": "Account verification options", - "description": "Method with which to verity the existing account", - "providerId": "basic-flow", - "topLevel": false, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticator": "idp-email-verification", - "authenticatorFlow": false, - "requirement": "ALTERNATIVE", - "priority": 10, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticatorFlow": true, - "requirement": "ALTERNATIVE", - "priority": 20, - "flowAlias": "Verify Existing Account by Re-authentication", - "userSetupAllowed": false, - "autheticatorFlow": true - } - ] - }, - { - "id": "602533e3-f7a1-4e25-9a12-f3080eeccec3", - "alias": "Authentication Options", - "description": "Authentication options.", - "providerId": "basic-flow", - "topLevel": false, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticator": "basic-auth", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 10, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticator": "basic-auth-otp", - "authenticatorFlow": false, - "requirement": "DISABLED", - "priority": 20, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticator": "auth-spnego", - "authenticatorFlow": false, - "requirement": "DISABLED", - "priority": 30, - "userSetupAllowed": false, - "autheticatorFlow": false - } - ] - }, - { - "id": "ba7bcdfd-05c6-4da6-827b-24e3513bddbe", - "alias": "Browser - Conditional OTP", - "description": "Flow to determine if the OTP is required for the authentication", - "providerId": "basic-flow", - "topLevel": false, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticator": "conditional-user-configured", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 10, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticator": "auth-otp-form", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 20, - "userSetupAllowed": false, - "autheticatorFlow": false - } - ] - }, - { - "id": "d0f62327-ef2f-4561-8b5a-1f61faecdac0", - "alias": "Direct Grant - Conditional OTP", - "description": "Flow to determine if the OTP is required for the authentication", - "providerId": "basic-flow", - "topLevel": false, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticator": "conditional-user-configured", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 10, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticator": "direct-grant-validate-otp", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 20, - "userSetupAllowed": false, - "autheticatorFlow": false - } - ] - }, - { - "id": "f10b85d0-26ee-4648-b81b-80213b066d76", - "alias": "First broker login - Conditional OTP", - "description": "Flow to determine if the OTP is required for the authentication", - "providerId": "basic-flow", - "topLevel": false, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticator": "conditional-user-configured", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 10, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticator": "auth-otp-form", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 20, - "userSetupAllowed": false, - "autheticatorFlow": false - } - ] - }, - { - "id": "d6af4ac0-f6bc-4197-bf01-6e2c321ecaad", - "alias": "Handle Existing Account", - "description": "Handle what to do if there is existing account with same email/username like authenticated identity provider", - "providerId": "basic-flow", - "topLevel": false, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticator": "idp-confirm-link", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 10, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticatorFlow": true, - "requirement": "REQUIRED", - "priority": 20, - "flowAlias": "Account verification options", - "userSetupAllowed": false, - "autheticatorFlow": true - } - ] - }, - { - "id": "501ab743-2e2f-427d-820f-14deed111b08", - "alias": "Reset - Conditional OTP", - "description": "Flow to determine if the OTP should be reset or not. Set to REQUIRED to force.", - "providerId": "basic-flow", - "topLevel": false, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticator": "conditional-user-configured", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 10, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticator": "reset-otp", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 20, - "userSetupAllowed": false, - "autheticatorFlow": false - } - ] - }, - { - "id": "e02c3a63-a09d-4dde-9f6c-22c95eef8534", - "alias": "User creation or linking", - "description": "Flow for the existing/non-existing user alternatives", - "providerId": "basic-flow", - "topLevel": false, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticatorConfig": "create unique user config", - "authenticator": "idp-create-user-if-unique", - "authenticatorFlow": false, - "requirement": "ALTERNATIVE", - "priority": 10, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticatorFlow": true, - "requirement": "ALTERNATIVE", - "priority": 20, - "flowAlias": "Handle Existing Account", - "userSetupAllowed": false, - "autheticatorFlow": true - } - ] - }, - { - "id": "c348906d-6266-4e68-937e-8f3d15c66524", - "alias": "Verify Existing Account by Re-authentication", - "description": "Reauthentication of existing account", - "providerId": "basic-flow", - "topLevel": false, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticator": "idp-username-password-form", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 10, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticatorFlow": true, - "requirement": "CONDITIONAL", - "priority": 20, - "flowAlias": "First broker login - Conditional OTP", - "userSetupAllowed": false, - "autheticatorFlow": true - } - ] - }, - { - "id": "cf6ba166-43d5-4687-95c4-0a184ca08885", - "alias": "browser", - "description": "browser based authentication", - "providerId": "basic-flow", - "topLevel": true, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticator": "auth-cookie", - "authenticatorFlow": false, - "requirement": "ALTERNATIVE", - "priority": 10, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticator": "auth-spnego", - "authenticatorFlow": false, - "requirement": "DISABLED", - "priority": 20, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticator": "identity-provider-redirector", - "authenticatorFlow": false, - "requirement": "ALTERNATIVE", - "priority": 25, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticatorFlow": true, - "requirement": "ALTERNATIVE", - "priority": 30, - "flowAlias": "forms", - "userSetupAllowed": false, - "autheticatorFlow": true - } - ] - }, - { - "id": "87cb4f25-9275-4617-9e95-63adf1ce3ece", - "alias": "clients", - "description": "Base authentication for clients", - "providerId": "client-flow", - "topLevel": true, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticator": "client-secret", - "authenticatorFlow": false, - "requirement": "ALTERNATIVE", - "priority": 10, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticator": "client-jwt", - "authenticatorFlow": false, - "requirement": "ALTERNATIVE", - "priority": 20, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticator": "client-secret-jwt", - "authenticatorFlow": false, - "requirement": "ALTERNATIVE", - "priority": 30, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticator": "client-x509", - "authenticatorFlow": false, - "requirement": "ALTERNATIVE", - "priority": 40, - "userSetupAllowed": false, - "autheticatorFlow": false - } - ] - }, - { - "id": "e75b99c5-c566-4009-b0ba-c73716bed254", - "alias": "direct grant", - "description": "OpenID Connect Resource Owner Grant", - "providerId": "basic-flow", - "topLevel": true, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticator": "direct-grant-validate-username", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 10, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticator": "direct-grant-validate-password", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 20, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticatorFlow": true, - "requirement": "CONDITIONAL", - "priority": 30, - "flowAlias": "Direct Grant - Conditional OTP", - "userSetupAllowed": false, - "autheticatorFlow": true - } - ] - }, - { - "id": "8a97380c-0f70-45cb-a7b0-780eb70453ba", - "alias": "docker auth", - "description": "Used by Docker clients to authenticate against the IDP", - "providerId": "basic-flow", - "topLevel": true, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticator": "docker-http-basic-authenticator", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 10, - "userSetupAllowed": false, - "autheticatorFlow": false - } - ] - }, - { - "id": "131e0aad-5422-4504-bafc-96be2fa44c34", - "alias": "first broker login", - "description": "Actions taken after first broker login with identity provider account, which is not yet linked to any Keycloak account", - "providerId": "basic-flow", - "topLevel": true, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticatorConfig": "review profile config", - "authenticator": "idp-review-profile", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 10, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticatorFlow": true, - "requirement": "REQUIRED", - "priority": 20, - "flowAlias": "User creation or linking", - "userSetupAllowed": false, - "autheticatorFlow": true - } - ] - }, - { - "id": "e7d4b793-b3c2-4ec3-a2b1-04f7217e8f46", - "alias": "forms", - "description": "Username, password, otp and other auth forms.", - "providerId": "basic-flow", - "topLevel": false, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticator": "auth-username-password-form", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 10, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticatorFlow": true, - "requirement": "CONDITIONAL", - "priority": 20, - "flowAlias": "Browser - Conditional OTP", - "userSetupAllowed": false, - "autheticatorFlow": true - } - ] - }, - { - "id": "f59a7688-61a1-4ac9-a13a-03f92e022add", - "alias": "http challenge", - "description": "An authentication flow based on challenge-response HTTP Authentication Schemes", - "providerId": "basic-flow", - "topLevel": true, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticator": "no-cookie-redirect", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 10, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticatorFlow": true, - "requirement": "REQUIRED", - "priority": 20, - "flowAlias": "Authentication Options", - "userSetupAllowed": false, - "autheticatorFlow": true - } - ] - }, - { - "id": "80a7b0f5-abb3-4780-be58-4ed1dc3e50fa", - "alias": "registration", - "description": "registration flow", - "providerId": "basic-flow", - "topLevel": true, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticator": "registration-page-form", - "authenticatorFlow": true, - "requirement": "REQUIRED", - "priority": 10, - "flowAlias": "registration form", - "userSetupAllowed": false, - "autheticatorFlow": true - } - ] - }, - { - "id": "f18231cf-b803-493b-9dd6-ee8fa602c861", - "alias": "registration form", - "description": "registration form", - "providerId": "form-flow", - "topLevel": false, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticator": "registration-user-creation", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 20, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticator": "registration-profile-action", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 40, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticator": "registration-password-action", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 50, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticator": "registration-recaptcha-action", - "authenticatorFlow": false, - "requirement": "DISABLED", - "priority": 60, - "userSetupAllowed": false, - "autheticatorFlow": false - } - ] - }, - { - "id": "34ccfce6-1488-4db3-b90e-d98e8d8b2ae6", - "alias": "reset credentials", - "description": "Reset credentials for a user if they forgot their password or something", - "providerId": "basic-flow", - "topLevel": true, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticator": "reset-credentials-choose-user", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 10, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticator": "reset-credential-email", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 20, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticator": "reset-password", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 30, - "userSetupAllowed": false, - "autheticatorFlow": false - }, - { - "authenticatorFlow": true, - "requirement": "CONDITIONAL", - "priority": 40, - "flowAlias": "Reset - Conditional OTP", - "userSetupAllowed": false, - "autheticatorFlow": true - } - ] - }, - { - "id": "4468100c-fa83-4c16-8970-d53cb592f93a", - "alias": "saml ecp", - "description": "SAML ECP Profile Authentication Flow", - "providerId": "basic-flow", - "topLevel": true, - "builtIn": true, - "authenticationExecutions": [ - { - "authenticator": "http-basic-authenticator", - "authenticatorFlow": false, - "requirement": "REQUIRED", - "priority": 10, - "userSetupAllowed": false, - "autheticatorFlow": false - } - ] - } - ], - "authenticatorConfig": [ - { - "id": "c3bb087e-7fe9-4f13-b1bd-c2d7d1320054", - "alias": "create unique user config", - "config": { - "require.password.update.after.registration": "false" - } - }, - { - "id": "09820d9d-3c12-45f3-bc62-97b53f8a7efe", - "alias": "review profile config", - "config": { - "update.profile.on.first.login": "missing" - } - } - ], - "requiredActions": [ - { - "alias": "CONFIGURE_TOTP", - "name": "Configure OTP", - "providerId": "CONFIGURE_TOTP", - "enabled": true, - "defaultAction": false, - "priority": 10, - "config": {} - }, - { - "alias": "terms_and_conditions", - "name": "Terms and Conditions", - "providerId": "terms_and_conditions", - "enabled": false, - "defaultAction": false, - "priority": 20, - "config": {} - }, - { - "alias": "UPDATE_PASSWORD", - "name": "Update Password", - "providerId": "UPDATE_PASSWORD", - "enabled": true, - "defaultAction": false, - "priority": 30, - "config": {} - }, - { - "alias": "UPDATE_PROFILE", - "name": "Update Profile", - "providerId": "UPDATE_PROFILE", - "enabled": true, - "defaultAction": false, - "priority": 40, - "config": {} - }, - { - "alias": "VERIFY_EMAIL", - "name": "Verify Email", - "providerId": "VERIFY_EMAIL", - "enabled": true, - "defaultAction": false, - "priority": 50, - "config": {} - }, - { - "alias": "delete_account", - "name": "Delete Account", - "providerId": "delete_account", - "enabled": false, - "defaultAction": false, - "priority": 60, - "config": {} - }, - { - "alias": "update_user_locale", - "name": "Update User Locale", - "providerId": "update_user_locale", - "enabled": true, - "defaultAction": false, - "priority": 1000, - "config": {} - } - ], - "browserFlow": "browser", - "registrationFlow": "registration", - "directGrantFlow": "direct grant", - "resetCredentialsFlow": "reset credentials", - "clientAuthenticationFlow": "clients", - "dockerAuthenticationFlow": "docker auth", - "attributes": { - "cibaBackchannelTokenDeliveryMode": "poll", - "cibaExpiresIn": "120", - "cibaAuthRequestedUserHint": "login_hint", - "oauth2DeviceCodeLifespan": "600", - "clientOfflineSessionMaxLifespan": "0", - "oauth2DevicePollingInterval": "5", - "clientSessionIdleTimeout": "0", - "parRequestUriLifespan": "60", - "clientSessionMaxLifespan": "0", - "clientOfflineSessionIdleTimeout": "0", - "cibaInterval": "5" - }, - "keycloakVersion": "16.1.1", - "userManagedAccessAllowed": false, - "clientProfiles": { - "profiles": [] - }, - "clientPolicies": { - "policies": [] - } -} diff --git a/conf/keycloak/run-keycloak.sh b/conf/keycloak/run-keycloak.sh index effb37f91b8..ddc5108bee4 100755 --- a/conf/keycloak/run-keycloak.sh +++ b/conf/keycloak/run-keycloak.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -DOCKER_IMAGE="jboss/keycloak:16.1.1" +DOCKER_IMAGE="quay.io/keycloak/keycloak:21.0" KEYCLOAK_USER="kcadmin" KEYCLOAK_PASSWORD="kcpassword" KEYCLOAK_PORT=8090 @@ -11,7 +11,7 @@ if [ ! "$(docker ps -q -f name=^/keycloak$)" ]; then docker start keycloak echo "INFO - Keycloak container restarted" else - docker run -d --name keycloak -p $KEYCLOAK_PORT:8080 -e KEYCLOAK_USER=$KEYCLOAK_USER -e KEYCLOAK_PASSWORD=$KEYCLOAK_PASSWORD -e KEYCLOAK_IMPORT=/tmp/oidc-realm.json -v "$(pwd)"/oidc-realm.json:/tmp/oidc-realm.json $DOCKER_IMAGE + docker run -d --name keycloak -p $KEYCLOAK_PORT:8080 -e KEYCLOAK_USER=$KEYCLOAK_USER -e KEYCLOAK_PASSWORD=$KEYCLOAK_PASSWORD -e KEYCLOAK_IMPORT=/tmp/test-realm.json -v "$(pwd)"/test-realm.json:/tmp/test-realm.json $DOCKER_IMAGE echo "INFO - Keycloak container created and running" fi else diff --git a/conf/keycloak/test-realm.json b/conf/keycloak/test-realm.json new file mode 100644 index 00000000000..efe71cc5d29 --- /dev/null +++ b/conf/keycloak/test-realm.json @@ -0,0 +1,1939 @@ +{ + "id" : "80a7e04b-a2b5-4891-a2d1-5ad4e915f983", + "realm" : "test", + "displayName" : "", + "displayNameHtml" : "", + "notBefore" : 0, + "defaultSignatureAlgorithm" : "RS256", + "revokeRefreshToken" : false, + "refreshTokenMaxReuse" : 0, + "accessTokenLifespan" : 300, + "accessTokenLifespanForImplicitFlow" : 900, + "ssoSessionIdleTimeout" : 1800, + "ssoSessionMaxLifespan" : 36000, + "ssoSessionIdleTimeoutRememberMe" : 0, + "ssoSessionMaxLifespanRememberMe" : 0, + "offlineSessionIdleTimeout" : 2592000, + "offlineSessionMaxLifespanEnabled" : false, + "offlineSessionMaxLifespan" : 5184000, + "clientSessionIdleTimeout" : 0, + "clientSessionMaxLifespan" : 0, + "clientOfflineSessionIdleTimeout" : 0, + "clientOfflineSessionMaxLifespan" : 0, + "accessCodeLifespan" : 60, + "accessCodeLifespanUserAction" : 300, + "accessCodeLifespanLogin" : 1800, + "actionTokenGeneratedByAdminLifespan" : 43200, + "actionTokenGeneratedByUserLifespan" : 300, + "oauth2DeviceCodeLifespan" : 600, + "oauth2DevicePollingInterval" : 5, + "enabled" : true, + "sslRequired" : "none", + "registrationAllowed" : false, + "registrationEmailAsUsername" : false, + "rememberMe" : false, + "verifyEmail" : false, + "loginWithEmailAllowed" : true, + "duplicateEmailsAllowed" : false, + "resetPasswordAllowed" : false, + "editUsernameAllowed" : false, + "bruteForceProtected" : false, + "permanentLockout" : false, + "maxFailureWaitSeconds" : 900, + "minimumQuickLoginWaitSeconds" : 60, + "waitIncrementSeconds" : 60, + "quickLoginCheckMilliSeconds" : 1000, + "maxDeltaTimeSeconds" : 43200, + "failureFactor" : 30, + "roles" : { + "realm" : [ { + "id" : "075daee1-5ab2-44b5-adbf-fa49a3da8305", + "name" : "uma_authorization", + "description" : "${role_uma_authorization}", + "composite" : false, + "clientRole" : false, + "containerId" : "80a7e04b-a2b5-4891-a2d1-5ad4e915f983", + "attributes" : { } + }, { + "id" : "b4ff9091-ddf9-4536-b175-8cfa3e331d71", + "name" : "default-roles-test", + "description" : "${role_default-roles}", + "composite" : true, + "composites" : { + "realm" : [ "offline_access", "uma_authorization" ], + "client" : { + "account" : [ "view-profile", "manage-account" ] + } + }, + "clientRole" : false, + "containerId" : "80a7e04b-a2b5-4891-a2d1-5ad4e915f983", + "attributes" : { } + }, { + "id" : "e6d31555-6be6-4dee-bc6a-40a53108e4c2", + "name" : "offline_access", + "description" : "${role_offline-access}", + "composite" : false, + "clientRole" : false, + "containerId" : "80a7e04b-a2b5-4891-a2d1-5ad4e915f983", + "attributes" : { } + } ], + "client" : { + "realm-management" : [ { + "id" : "1955bd12-5f86-4a74-b130-d68a8ef6f0ee", + "name" : "impersonation", + "description" : "${role_impersonation}", + "composite" : false, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + }, { + "id" : "1109c350-9ab1-426c-9876-ef67d4310f35", + "name" : "view-authorization", + "description" : "${role_view-authorization}", + "composite" : false, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + }, { + "id" : "980c3fd3-1ae3-4b8f-9a00-d764c939035f", + "name" : "query-users", + "description" : "${role_query-users}", + "composite" : false, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + }, { + "id" : "5363e601-0f9d-4633-a8c8-28cb0f859b7b", + "name" : "query-groups", + "description" : "${role_query-groups}", + "composite" : false, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + }, { + "id" : "59aa7992-ad78-48db-868a-25d6e1d7db50", + "name" : "realm-admin", + "description" : "${role_realm-admin}", + "composite" : true, + "composites" : { + "client" : { + "realm-management" : [ "impersonation", "view-authorization", "query-users", "query-groups", "manage-clients", "manage-realm", "view-identity-providers", "query-realms", "manage-authorization", "manage-identity-providers", "manage-users", "view-users", "view-realm", "create-client", "view-clients", "manage-events", "query-clients", "view-events" ] + } + }, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + }, { + "id" : "112f53c2-897d-4c01-81db-b8dc10c5b995", + "name" : "manage-clients", + "description" : "${role_manage-clients}", + "composite" : false, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + }, { + "id" : "c7f57bbd-ef32-4a64-9888-7b8abd90777a", + "name" : "manage-realm", + "description" : "${role_manage-realm}", + "composite" : false, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + }, { + "id" : "8885dac8-0af3-45af-94ce-eff5e801bb80", + "name" : "view-identity-providers", + "description" : "${role_view-identity-providers}", + "composite" : false, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + }, { + "id" : "2673346c-b0ef-4e01-8a90-be03866093af", + "name" : "manage-authorization", + "description" : "${role_manage-authorization}", + "composite" : false, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + }, { + "id" : "b7182885-9e57-445f-8dae-17c16eb31b5d", + "name" : "manage-identity-providers", + "description" : "${role_manage-identity-providers}", + "composite" : false, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + }, { + "id" : "ba7bfe0c-cb07-4a47-b92c-b8132b57e181", + "name" : "manage-users", + "description" : "${role_manage-users}", + "composite" : false, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + }, { + "id" : "13a8f0fc-647d-4bfe-b525-73956898e550", + "name" : "query-realms", + "description" : "${role_query-realms}", + "composite" : false, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + }, { + "id" : "ef4c57dc-78c2-4f9a-8d2b-0e97d46fc842", + "name" : "view-realm", + "description" : "${role_view-realm}", + "composite" : false, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + }, { + "id" : "2875da34-006c-4b7f-bfc8-9ae8e46af3a2", + "name" : "view-users", + "description" : "${role_view-users}", + "composite" : true, + "composites" : { + "client" : { + "realm-management" : [ "query-users", "query-groups" ] + } + }, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + }, { + "id" : "c8c8f7dc-876b-4263-806f-3329f7cd5fd3", + "name" : "create-client", + "description" : "${role_create-client}", + "composite" : false, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + }, { + "id" : "21b84f90-5a9a-4845-a7ba-bbd98ac0fcc4", + "name" : "view-clients", + "description" : "${role_view-clients}", + "composite" : true, + "composites" : { + "client" : { + "realm-management" : [ "query-clients" ] + } + }, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + }, { + "id" : "6fd64c94-d663-4501-ad77-0dcf8887d434", + "name" : "manage-events", + "description" : "${role_manage-events}", + "composite" : false, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + }, { + "id" : "b321927a-023c-4d2a-99ad-24baf7ff6d83", + "name" : "query-clients", + "description" : "${role_query-clients}", + "composite" : false, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + }, { + "id" : "2fc21160-78de-457b-8594-e5c76cde1d5e", + "name" : "view-events", + "description" : "${role_view-events}", + "composite" : false, + "clientRole" : true, + "containerId" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "attributes" : { } + } ], + "test" : [ ], + "security-admin-console" : [ ], + "admin-cli" : [ ], + "account-console" : [ ], + "broker" : [ { + "id" : "07ee59b5-dca6-48fb-83d4-2994ef02850e", + "name" : "read-token", + "description" : "${role_read-token}", + "composite" : false, + "clientRole" : true, + "containerId" : "b57d62bb-77ff-42bd-b8ff-381c7288f327", + "attributes" : { } + } ], + "account" : [ { + "id" : "17d2f811-7bdf-4c73-83b4-1037001797b8", + "name" : "view-applications", + "description" : "${role_view-applications}", + "composite" : false, + "clientRole" : true, + "containerId" : "77f8127a-261e-4cd8-a77d-b74a389f7fd4", + "attributes" : { } + }, { + "id" : "d1ff44f9-419e-42fd-98e8-1add1169a972", + "name" : "delete-account", + "description" : "${role_delete-account}", + "composite" : false, + "clientRole" : true, + "containerId" : "77f8127a-261e-4cd8-a77d-b74a389f7fd4", + "attributes" : { } + }, { + "id" : "14c23a18-ae2d-43c9-b0c0-aaf6e0c7f5b0", + "name" : "manage-account-links", + "description" : "${role_manage-account-links}", + "composite" : false, + "clientRole" : true, + "containerId" : "77f8127a-261e-4cd8-a77d-b74a389f7fd4", + "attributes" : { } + }, { + "id" : "6fbe58af-d2fe-4d66-95fe-a2e8a818cb55", + "name" : "view-profile", + "description" : "${role_view-profile}", + "composite" : false, + "clientRole" : true, + "containerId" : "77f8127a-261e-4cd8-a77d-b74a389f7fd4", + "attributes" : { } + }, { + "id" : "bdfd02bc-6f6a-47d2-82bc-0ca52d78ff48", + "name" : "manage-consent", + "description" : "${role_manage-consent}", + "composite" : true, + "composites" : { + "client" : { + "account" : [ "view-consent" ] + } + }, + "clientRole" : true, + "containerId" : "77f8127a-261e-4cd8-a77d-b74a389f7fd4", + "attributes" : { } + }, { + "id" : "782f3b0c-a17b-4a87-988b-1a711401f3b0", + "name" : "manage-account", + "description" : "${role_manage-account}", + "composite" : true, + "composites" : { + "client" : { + "account" : [ "manage-account-links" ] + } + }, + "clientRole" : true, + "containerId" : "77f8127a-261e-4cd8-a77d-b74a389f7fd4", + "attributes" : { } + }, { + "id" : "8a3bfe15-66d9-4f3d-83ac-801d682d42b0", + "name" : "view-consent", + "description" : "${role_view-consent}", + "composite" : false, + "clientRole" : true, + "containerId" : "77f8127a-261e-4cd8-a77d-b74a389f7fd4", + "attributes" : { } + } ] + } + }, + "groups" : [ { + "id" : "d46f94c2-3b47-4288-b937-9cf918e54f0a", + "name" : "admins", + "path" : "/admins", + "attributes" : { }, + "realmRoles" : [ ], + "clientRoles" : { }, + "subGroups" : [ ] + }, { + "id" : "e992ce15-baac-48a0-8834-06f6fcf6c05b", + "name" : "curators", + "path" : "/curators", + "attributes" : { }, + "realmRoles" : [ ], + "clientRoles" : { }, + "subGroups" : [ ] + }, { + "id" : "531cf81d-a700-4336-808f-37a49709b48c", + "name" : "members", + "path" : "/members", + "attributes" : { }, + "realmRoles" : [ ], + "clientRoles" : { }, + "subGroups" : [ ] + } ], + "defaultRole" : { + "id" : "b4ff9091-ddf9-4536-b175-8cfa3e331d71", + "name" : "default-roles-test", + "description" : "${role_default-roles}", + "composite" : true, + "clientRole" : false, + "containerId" : "80a7e04b-a2b5-4891-a2d1-5ad4e915f983" + }, + "requiredCredentials" : [ "password" ], + "otpPolicyType" : "totp", + "otpPolicyAlgorithm" : "HmacSHA1", + "otpPolicyInitialCounter" : 0, + "otpPolicyDigits" : 6, + "otpPolicyLookAheadWindow" : 1, + "otpPolicyPeriod" : 30, + "otpSupportedApplications" : [ "FreeOTP", "Google Authenticator" ], + "webAuthnPolicyRpEntityName" : "keycloak", + "webAuthnPolicySignatureAlgorithms" : [ "ES256" ], + "webAuthnPolicyRpId" : "", + "webAuthnPolicyAttestationConveyancePreference" : "not specified", + "webAuthnPolicyAuthenticatorAttachment" : "not specified", + "webAuthnPolicyRequireResidentKey" : "not specified", + "webAuthnPolicyUserVerificationRequirement" : "not specified", + "webAuthnPolicyCreateTimeout" : 0, + "webAuthnPolicyAvoidSameAuthenticatorRegister" : false, + "webAuthnPolicyAcceptableAaguids" : [ ], + "webAuthnPolicyPasswordlessRpEntityName" : "keycloak", + "webAuthnPolicyPasswordlessSignatureAlgorithms" : [ "ES256" ], + "webAuthnPolicyPasswordlessRpId" : "", + "webAuthnPolicyPasswordlessAttestationConveyancePreference" : "not specified", + "webAuthnPolicyPasswordlessAuthenticatorAttachment" : "not specified", + "webAuthnPolicyPasswordlessRequireResidentKey" : "not specified", + "webAuthnPolicyPasswordlessUserVerificationRequirement" : "not specified", + "webAuthnPolicyPasswordlessCreateTimeout" : 0, + "webAuthnPolicyPasswordlessAvoidSameAuthenticatorRegister" : false, + "webAuthnPolicyPasswordlessAcceptableAaguids" : [ ], + "users" : [ { + "id" : "52cddd46-251c-4534-acc8-0580eeafb577", + "createdTimestamp" : 1684736014759, + "username" : "admin", + "enabled" : true, + "totp" : false, + "emailVerified" : true, + "firstName" : "Dataverse", + "lastName" : "Admin", + "email" : "dataverse-admin@mailinator.com", + "credentials" : [ { + "id" : "28f1ece7-26fb-40f1-9174-5ffce7b85c0a", + "type" : "password", + "userLabel" : "Set to \"admin\"", + "createdDate" : 1684736057302, + "secretData" : "{\"value\":\"ONI7fl6BmooVTUgwN1W3m7hsRjMAYEr2l+Fp5+7IOYw1iIntwvZ3U3W0ZBcCFJ7uhcKqF101+rueM3dZfoshPQ==\",\"salt\":\"Hj7co7zYVei7xwx8EaYP3A==\",\"additionalParameters\":{}}", + "credentialData" : "{\"hashIterations\":27500,\"algorithm\":\"pbkdf2-sha256\",\"additionalParameters\":{}}" + } ], + "disableableCredentialTypes" : [ ], + "requiredActions" : [ ], + "realmRoles" : [ "default-roles-test" ], + "notBefore" : 0, + "groups" : [ "/admins" ] + }, { + "id" : "a3d8e76d-7e7b-42dc-bbd7-4258818a8a1b", + "createdTimestamp" : 1684755806552, + "username" : "affiliate", + "enabled" : true, + "totp" : false, + "emailVerified" : true, + "firstName" : "Dataverse", + "lastName" : "Affiliate", + "email" : "dataverse-affiliate@mailinator.com", + "credentials" : [ { + "id" : "31c8eb1e-b2a8-4f86-833b-7c0536cd61a1", + "type" : "password", + "userLabel" : "My password", + "createdDate" : 1684755821743, + "secretData" : "{\"value\":\"T+RQ4nvmjknj7ds8NU7782j6PJ++uCu98zNoDQjIe9IKXah+13q4EcXO9IHmi2BJ7lgT0OIzwIoac4JEQLxhjQ==\",\"salt\":\"fnRmE9WmjAp4tlvGh/bxxQ==\",\"additionalParameters\":{}}", + "credentialData" : "{\"hashIterations\":27500,\"algorithm\":\"pbkdf2-sha256\",\"additionalParameters\":{}}" + } ], + "disableableCredentialTypes" : [ ], + "requiredActions" : [ ], + "realmRoles" : [ "default-roles-test" ], + "notBefore" : 0, + "groups" : [ ] + }, { + "id" : "e5531496-cfb8-498c-a902-50c98d649e79", + "createdTimestamp" : 1684755721064, + "username" : "curator", + "enabled" : true, + "totp" : false, + "emailVerified" : true, + "firstName" : "Dataverse", + "lastName" : "Curator", + "email" : "dataverse-curator@mailinator.com", + "credentials" : [ { + "id" : "664546b4-b936-45cf-a4cf-5e98b743fc7f", + "type" : "password", + "userLabel" : "My password", + "createdDate" : 1684755740776, + "secretData" : "{\"value\":\"AvVqybCNtCBVAdLEeJKresy9tc3c4BBUQvu5uHVQw4IjVagN6FpKGlDEKOrxhzdSM8skEvthOEqJkloPo1w+NQ==\",\"salt\":\"2em2DDRRlNEYsNR3xDqehw==\",\"additionalParameters\":{}}", + "credentialData" : "{\"hashIterations\":27500,\"algorithm\":\"pbkdf2-sha256\",\"additionalParameters\":{}}" + } ], + "disableableCredentialTypes" : [ ], + "requiredActions" : [ ], + "realmRoles" : [ "default-roles-test" ], + "notBefore" : 0, + "groups" : [ "/curators" ] + }, { + "id" : "c0082e7e-a3e9-45e6-95e9-811a34adce9d", + "createdTimestamp" : 1684755585802, + "username" : "user", + "enabled" : true, + "totp" : false, + "emailVerified" : true, + "firstName" : "Dataverse", + "lastName" : "User", + "email" : "dataverse-user@mailinator.com", + "credentials" : [ { + "id" : "00d6d67f-2e30-4da6-a567-bec38a1886a0", + "type" : "password", + "userLabel" : "My password", + "createdDate" : 1684755599597, + "secretData" : "{\"value\":\"z991rnjznAgosi5nX962HjM8/gN5GLJTdrlvi6G9cj8470X2/oZUb4Lka6s8xImgtEloCgWiKqH0EH9G4Y3a5A==\",\"salt\":\"/Uz7w+2IqDo+fQUGqxjVHw==\",\"additionalParameters\":{}}", + "credentialData" : "{\"hashIterations\":27500,\"algorithm\":\"pbkdf2-sha256\",\"additionalParameters\":{}}" + } ], + "disableableCredentialTypes" : [ ], + "requiredActions" : [ ], + "realmRoles" : [ "default-roles-test" ], + "notBefore" : 0, + "groups" : [ "/members" ] + } ], + "scopeMappings" : [ { + "clientScope" : "offline_access", + "roles" : [ "offline_access" ] + } ], + "clientScopeMappings" : { + "account" : [ { + "client" : "account-console", + "roles" : [ "manage-account" ] + } ] + }, + "clients" : [ { + "id" : "77f8127a-261e-4cd8-a77d-b74a389f7fd4", + "clientId" : "account", + "name" : "${client_account}", + "rootUrl" : "${authBaseUrl}", + "baseUrl" : "/realms/test/account/", + "surrogateAuthRequired" : false, + "enabled" : true, + "alwaysDisplayInConsole" : false, + "clientAuthenticatorType" : "client-secret", + "redirectUris" : [ "/realms/test/account/*" ], + "webOrigins" : [ ], + "notBefore" : 0, + "bearerOnly" : false, + "consentRequired" : false, + "standardFlowEnabled" : true, + "implicitFlowEnabled" : false, + "directAccessGrantsEnabled" : false, + "serviceAccountsEnabled" : false, + "publicClient" : true, + "frontchannelLogout" : false, + "protocol" : "openid-connect", + "attributes" : { + "post.logout.redirect.uris" : "+" + }, + "authenticationFlowBindingOverrides" : { }, + "fullScopeAllowed" : false, + "nodeReRegistrationTimeout" : 0, + "defaultClientScopes" : [ "web-origins", "acr", "roles", "profile", "email" ], + "optionalClientScopes" : [ "address", "phone", "offline_access", "microprofile-jwt" ] + }, { + "id" : "5d99f721-027c-478d-867d-61114e0a8192", + "clientId" : "account-console", + "name" : "${client_account-console}", + "rootUrl" : "${authBaseUrl}", + "baseUrl" : "/realms/test/account/", + "surrogateAuthRequired" : false, + "enabled" : true, + "alwaysDisplayInConsole" : false, + "clientAuthenticatorType" : "client-secret", + "redirectUris" : [ "/realms/test/account/*" ], + "webOrigins" : [ ], + "notBefore" : 0, + "bearerOnly" : false, + "consentRequired" : false, + "standardFlowEnabled" : true, + "implicitFlowEnabled" : false, + "directAccessGrantsEnabled" : false, + "serviceAccountsEnabled" : false, + "publicClient" : true, + "frontchannelLogout" : false, + "protocol" : "openid-connect", + "attributes" : { + "post.logout.redirect.uris" : "+", + "pkce.code.challenge.method" : "S256" + }, + "authenticationFlowBindingOverrides" : { }, + "fullScopeAllowed" : false, + "nodeReRegistrationTimeout" : 0, + "protocolMappers" : [ { + "id" : "e181a0ce-9a04-4468-a38a-aaef9f78f989", + "name" : "audience resolve", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-audience-resolve-mapper", + "consentRequired" : false, + "config" : { } + } ], + "defaultClientScopes" : [ "web-origins", "acr", "roles", "profile", "email" ], + "optionalClientScopes" : [ "address", "phone", "offline_access", "microprofile-jwt" ] + }, { + "id" : "5eccc178-121e-4d0f-bcb2-04ae3c2e52ed", + "clientId" : "admin-cli", + "name" : "${client_admin-cli}", + "surrogateAuthRequired" : false, + "enabled" : true, + "alwaysDisplayInConsole" : false, + "clientAuthenticatorType" : "client-secret", + "redirectUris" : [ ], + "webOrigins" : [ ], + "notBefore" : 0, + "bearerOnly" : false, + "consentRequired" : false, + "standardFlowEnabled" : false, + "implicitFlowEnabled" : false, + "directAccessGrantsEnabled" : true, + "serviceAccountsEnabled" : false, + "publicClient" : true, + "frontchannelLogout" : false, + "protocol" : "openid-connect", + "attributes" : { }, + "authenticationFlowBindingOverrides" : { }, + "fullScopeAllowed" : false, + "nodeReRegistrationTimeout" : 0, + "defaultClientScopes" : [ "web-origins", "acr", "roles", "profile", "email" ], + "optionalClientScopes" : [ "address", "phone", "offline_access", "microprofile-jwt" ] + }, { + "id" : "b57d62bb-77ff-42bd-b8ff-381c7288f327", + "clientId" : "broker", + "name" : "${client_broker}", + "surrogateAuthRequired" : false, + "enabled" : true, + "alwaysDisplayInConsole" : false, + "clientAuthenticatorType" : "client-secret", + "redirectUris" : [ ], + "webOrigins" : [ ], + "notBefore" : 0, + "bearerOnly" : true, + "consentRequired" : false, + "standardFlowEnabled" : true, + "implicitFlowEnabled" : false, + "directAccessGrantsEnabled" : false, + "serviceAccountsEnabled" : false, + "publicClient" : false, + "frontchannelLogout" : false, + "protocol" : "openid-connect", + "attributes" : { }, + "authenticationFlowBindingOverrides" : { }, + "fullScopeAllowed" : false, + "nodeReRegistrationTimeout" : 0, + "defaultClientScopes" : [ "web-origins", "acr", "roles", "profile", "email" ], + "optionalClientScopes" : [ "address", "phone", "offline_access", "microprofile-jwt" ] + }, { + "id" : "dada0ae8-ee9f-415a-9685-42da7c563660", + "clientId" : "realm-management", + "name" : "${client_realm-management}", + "surrogateAuthRequired" : false, + "enabled" : true, + "alwaysDisplayInConsole" : false, + "clientAuthenticatorType" : "client-secret", + "redirectUris" : [ ], + "webOrigins" : [ ], + "notBefore" : 0, + "bearerOnly" : true, + "consentRequired" : false, + "standardFlowEnabled" : true, + "implicitFlowEnabled" : false, + "directAccessGrantsEnabled" : false, + "serviceAccountsEnabled" : false, + "publicClient" : false, + "frontchannelLogout" : false, + "protocol" : "openid-connect", + "attributes" : { }, + "authenticationFlowBindingOverrides" : { }, + "fullScopeAllowed" : false, + "nodeReRegistrationTimeout" : 0, + "defaultClientScopes" : [ "web-origins", "acr", "roles", "profile", "email" ], + "optionalClientScopes" : [ "address", "phone", "offline_access", "microprofile-jwt" ] + }, { + "id" : "bf7cf550-3875-4f97-9878-b2419a854058", + "clientId" : "security-admin-console", + "name" : "${client_security-admin-console}", + "rootUrl" : "${authAdminUrl}", + "baseUrl" : "/admin/test/console/", + "surrogateAuthRequired" : false, + "enabled" : true, + "alwaysDisplayInConsole" : false, + "clientAuthenticatorType" : "client-secret", + "redirectUris" : [ "/admin/test/console/*" ], + "webOrigins" : [ "+" ], + "notBefore" : 0, + "bearerOnly" : false, + "consentRequired" : false, + "standardFlowEnabled" : true, + "implicitFlowEnabled" : false, + "directAccessGrantsEnabled" : false, + "serviceAccountsEnabled" : false, + "publicClient" : true, + "frontchannelLogout" : false, + "protocol" : "openid-connect", + "attributes" : { + "post.logout.redirect.uris" : "+", + "pkce.code.challenge.method" : "S256" + }, + "authenticationFlowBindingOverrides" : { }, + "fullScopeAllowed" : false, + "nodeReRegistrationTimeout" : 0, + "protocolMappers" : [ { + "id" : "ff845e16-e200-4894-ab51-37d8b9f2a445", + "name" : "locale", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "locale", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "locale", + "jsonType.label" : "String" + } + } ], + "defaultClientScopes" : [ "web-origins", "acr", "roles", "profile", "email" ], + "optionalClientScopes" : [ "address", "phone", "offline_access", "microprofile-jwt" ] + }, { + "id" : "9c27faa8-4b8d-4ad9-9cd1-880032ef06aa", + "clientId" : "test", + "name" : "A Test Client", + "description" : "Use for hacking and testing away a confidential client", + "rootUrl" : "", + "adminUrl" : "", + "baseUrl" : "", + "surrogateAuthRequired" : false, + "enabled" : true, + "alwaysDisplayInConsole" : false, + "clientAuthenticatorType" : "client-secret", + "secret" : "94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8", + "redirectUris" : [ "*" ], + "webOrigins" : [ ], + "notBefore" : 0, + "bearerOnly" : false, + "consentRequired" : false, + "standardFlowEnabled" : true, + "implicitFlowEnabled" : false, + "directAccessGrantsEnabled" : true, + "serviceAccountsEnabled" : false, + "publicClient" : false, + "frontchannelLogout" : true, + "protocol" : "openid-connect", + "attributes" : { + "oidc.ciba.grant.enabled" : "false", + "client.secret.creation.time" : "1684735831", + "backchannel.logout.session.required" : "true", + "display.on.consent.screen" : "false", + "oauth2.device.authorization.grant.enabled" : "false", + "backchannel.logout.revoke.offline.tokens" : "false" + }, + "authenticationFlowBindingOverrides" : { }, + "fullScopeAllowed" : true, + "nodeReRegistrationTimeout" : -1, + "defaultClientScopes" : [ "web-origins", "acr", "roles", "profile", "email" ], + "optionalClientScopes" : [ "address", "phone", "offline_access", "microprofile-jwt" ] + } ], + "clientScopes" : [ { + "id" : "72f29e57-92fa-437b-828c-2b9d6fe56192", + "name" : "address", + "description" : "OpenID Connect built-in scope: address", + "protocol" : "openid-connect", + "attributes" : { + "include.in.token.scope" : "true", + "display.on.consent.screen" : "true", + "consent.screen.text" : "${addressScopeConsentText}" + }, + "protocolMappers" : [ { + "id" : "59581aea-70d6-4ee8-bec2-1fea5fc497ae", + "name" : "address", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-address-mapper", + "consentRequired" : false, + "config" : { + "user.attribute.formatted" : "formatted", + "user.attribute.country" : "country", + "user.attribute.postal_code" : "postal_code", + "userinfo.token.claim" : "true", + "user.attribute.street" : "street", + "id.token.claim" : "true", + "user.attribute.region" : "region", + "access.token.claim" : "true", + "user.attribute.locality" : "locality" + } + } ] + }, { + "id" : "f515ec81-3c1b-4d4d-b7a2-e7e8d47b6447", + "name" : "roles", + "description" : "OpenID Connect scope for add user roles to the access token", + "protocol" : "openid-connect", + "attributes" : { + "include.in.token.scope" : "false", + "display.on.consent.screen" : "true", + "consent.screen.text" : "${rolesScopeConsentText}" + }, + "protocolMappers" : [ { + "id" : "26d299a8-69e2-4864-9595-17a5b417fc61", + "name" : "realm roles", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-realm-role-mapper", + "consentRequired" : false, + "config" : { + "user.attribute" : "foo", + "access.token.claim" : "true", + "claim.name" : "realm_access.roles", + "jsonType.label" : "String", + "multivalued" : "true" + } + }, { + "id" : "d2998083-a8db-4f4e-9aaa-9cad68d65b97", + "name" : "audience resolve", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-audience-resolve-mapper", + "consentRequired" : false, + "config" : { } + }, { + "id" : "7a4cb2e5-07a0-4c16-a024-71df7ddd6868", + "name" : "client roles", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-client-role-mapper", + "consentRequired" : false, + "config" : { + "user.attribute" : "foo", + "access.token.claim" : "true", + "claim.name" : "resource_access.${client_id}.roles", + "jsonType.label" : "String", + "multivalued" : "true" + } + } ] + }, { + "id" : "8f1eafef-92d6-434e-b9ec-6edec1fddd0a", + "name" : "offline_access", + "description" : "OpenID Connect built-in scope: offline_access", + "protocol" : "openid-connect", + "attributes" : { + "consent.screen.text" : "${offlineAccessScopeConsentText}", + "display.on.consent.screen" : "true" + } + }, { + "id" : "c03095aa-b656-447a-9767-0763c2ccb070", + "name" : "acr", + "description" : "OpenID Connect scope for add acr (authentication context class reference) to the token", + "protocol" : "openid-connect", + "attributes" : { + "include.in.token.scope" : "false", + "display.on.consent.screen" : "false" + }, + "protocolMappers" : [ { + "id" : "948b230c-56d0-4000-937c-841cd395d3f9", + "name" : "acr loa level", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-acr-mapper", + "consentRequired" : false, + "config" : { + "id.token.claim" : "true", + "access.token.claim" : "true" + } + } ] + }, { + "id" : "cdf35f63-8ec7-41a0-ae12-f05d415818cc", + "name" : "phone", + "description" : "OpenID Connect built-in scope: phone", + "protocol" : "openid-connect", + "attributes" : { + "include.in.token.scope" : "true", + "display.on.consent.screen" : "true", + "consent.screen.text" : "${phoneScopeConsentText}" + }, + "protocolMappers" : [ { + "id" : "ba4348ff-90b1-4e09-89a8-e5c08b04d3d1", + "name" : "phone number", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "phoneNumber", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "phone_number", + "jsonType.label" : "String" + } + }, { + "id" : "e6cceae5-8392-4348-b302-f610ece6056e", + "name" : "phone number verified", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "phoneNumberVerified", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "phone_number_verified", + "jsonType.label" : "boolean" + } + } ] + }, { + "id" : "4318001c-2970-41d3-91b9-e31c08569872", + "name" : "email", + "description" : "OpenID Connect built-in scope: email", + "protocol" : "openid-connect", + "attributes" : { + "include.in.token.scope" : "true", + "display.on.consent.screen" : "true", + "consent.screen.text" : "${emailScopeConsentText}" + }, + "protocolMappers" : [ { + "id" : "406d02a6-866a-4962-8838-e8c58ada1505", + "name" : "email", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-property-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "email", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "email", + "jsonType.label" : "String" + } + }, { + "id" : "33baabc1-9bf2-42e4-8b8e-a53c13f0b744", + "name" : "email verified", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-property-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "emailVerified", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "email_verified", + "jsonType.label" : "boolean" + } + } ] + }, { + "id" : "5277a84f-d727-4c64-8432-d513127beee1", + "name" : "profile", + "description" : "OpenID Connect built-in scope: profile", + "protocol" : "openid-connect", + "attributes" : { + "include.in.token.scope" : "true", + "display.on.consent.screen" : "true", + "consent.screen.text" : "${profileScopeConsentText}" + }, + "protocolMappers" : [ { + "id" : "0a609875-2678-4056-93ef-dd5c03e6059d", + "name" : "given name", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-property-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "firstName", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "given_name", + "jsonType.label" : "String" + } + }, { + "id" : "7c510d18-07ee-4b78-8acd-24b777d11b3c", + "name" : "website", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "website", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "website", + "jsonType.label" : "String" + } + }, { + "id" : "0bb6d0ea-195f-49e8-918c-c419a26a661c", + "name" : "username", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-property-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "username", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "preferred_username", + "jsonType.label" : "String" + } + }, { + "id" : "5f1e644c-1acf-440c-b1a6-b5f65bcebfd9", + "name" : "profile", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "profile", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "profile", + "jsonType.label" : "String" + } + }, { + "id" : "c710bdb2-6cfd-4f60-9c4e-730188fc62f7", + "name" : "family name", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-property-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "lastName", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "family_name", + "jsonType.label" : "String" + } + }, { + "id" : "012d5038-0e13-42ba-9df7-2487c8e2eead", + "name" : "nickname", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "nickname", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "nickname", + "jsonType.label" : "String" + } + }, { + "id" : "21590b19-517d-4b6d-92f6-d4f71238677e", + "name" : "updated at", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "updatedAt", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "updated_at", + "jsonType.label" : "long" + } + }, { + "id" : "e4cddca7-1360-42f3-9854-da6cbe00c71e", + "name" : "birthdate", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "birthdate", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "birthdate", + "jsonType.label" : "String" + } + }, { + "id" : "afee328f-c64c-43e6-80d0-be2721c2ed0e", + "name" : "locale", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "locale", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "locale", + "jsonType.label" : "String" + } + }, { + "id" : "780a1e2c-5b63-46f4-a5bf-dc3fd8ce0cbb", + "name" : "full name", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-full-name-mapper", + "consentRequired" : false, + "config" : { + "id.token.claim" : "true", + "access.token.claim" : "true", + "userinfo.token.claim" : "true" + } + }, { + "id" : "aeebffff-f776-427e-83ed-064707ffce57", + "name" : "zoneinfo", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "zoneinfo", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "zoneinfo", + "jsonType.label" : "String" + } + }, { + "id" : "b3e840a2-1794-4da1-bf69-31905cbff0d6", + "name" : "middle name", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "middleName", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "middle_name", + "jsonType.label" : "String" + } + }, { + "id" : "0607e0e4-4f7f-4214-996d-3599772ce1c7", + "name" : "picture", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "picture", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "picture", + "jsonType.label" : "String" + } + }, { + "id" : "426a609b-4e28-4132-af0d-13297b8cb63a", + "name" : "gender", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "gender", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "gender", + "jsonType.label" : "String" + } + } ] + }, { + "id" : "a1ebde82-ce21-438f-a3ad-261d3eeb1c01", + "name" : "role_list", + "description" : "SAML role list", + "protocol" : "saml", + "attributes" : { + "consent.screen.text" : "${samlRoleListScopeConsentText}", + "display.on.consent.screen" : "true" + }, + "protocolMappers" : [ { + "id" : "64653ac7-7ffc-4f7c-a589-03e3b68bbd25", + "name" : "role list", + "protocol" : "saml", + "protocolMapper" : "saml-role-list-mapper", + "consentRequired" : false, + "config" : { + "single" : "false", + "attribute.nameformat" : "Basic", + "attribute.name" : "Role" + } + } ] + }, { + "id" : "aeb5b852-dfec-4e67-9d9e-104abe9b3bf2", + "name" : "web-origins", + "description" : "OpenID Connect scope for add allowed web origins to the access token", + "protocol" : "openid-connect", + "attributes" : { + "include.in.token.scope" : "false", + "display.on.consent.screen" : "false", + "consent.screen.text" : "" + }, + "protocolMappers" : [ { + "id" : "e2fa8437-a0f1-46fc-af9c-c40fc09cd6a1", + "name" : "allowed web origins", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-allowed-origins-mapper", + "consentRequired" : false, + "config" : { } + } ] + }, { + "id" : "4fecd0d7-d4ad-457e-90f2-c7202bf01ff5", + "name" : "microprofile-jwt", + "description" : "Microprofile - JWT built-in scope", + "protocol" : "openid-connect", + "attributes" : { + "include.in.token.scope" : "true", + "display.on.consent.screen" : "false" + }, + "protocolMappers" : [ { + "id" : "a9536634-a9f6-4ed5-a8e7-8379d3b002ca", + "name" : "upn", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-property-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "username", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "upn", + "jsonType.label" : "String" + } + }, { + "id" : "2ce1a702-9458-4926-9b8a-f82c07215755", + "name" : "groups", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-realm-role-mapper", + "consentRequired" : false, + "config" : { + "multivalued" : "true", + "user.attribute" : "foo", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "groups", + "jsonType.label" : "String" + } + } ] + } ], + "defaultDefaultClientScopes" : [ "role_list", "profile", "email", "roles", "web-origins", "acr" ], + "defaultOptionalClientScopes" : [ "offline_access", "address", "phone", "microprofile-jwt" ], + "browserSecurityHeaders" : { + "contentSecurityPolicyReportOnly" : "", + "xContentTypeOptions" : "nosniff", + "xRobotsTag" : "none", + "xFrameOptions" : "SAMEORIGIN", + "contentSecurityPolicy" : "frame-src 'self'; frame-ancestors 'self'; object-src 'none';", + "xXSSProtection" : "1; mode=block", + "strictTransportSecurity" : "max-age=31536000; includeSubDomains" + }, + "smtpServer" : { }, + "eventsEnabled" : false, + "eventsListeners" : [ "jboss-logging" ], + "enabledEventTypes" : [ ], + "adminEventsEnabled" : false, + "adminEventsDetailsEnabled" : false, + "identityProviders" : [ ], + "identityProviderMappers" : [ ], + "components" : { + "org.keycloak.services.clientregistration.policy.ClientRegistrationPolicy" : [ { + "id" : "8115796f-8f1f-4d6a-88f8-ca2938451260", + "name" : "Allowed Client Scopes", + "providerId" : "allowed-client-templates", + "subType" : "authenticated", + "subComponents" : { }, + "config" : { + "allow-default-scopes" : [ "true" ] + } + }, { + "id" : "044bd055-714d-478e-aa93-303d2161c427", + "name" : "Allowed Protocol Mapper Types", + "providerId" : "allowed-protocol-mappers", + "subType" : "authenticated", + "subComponents" : { }, + "config" : { + "allowed-protocol-mapper-types" : [ "saml-user-property-mapper", "oidc-address-mapper", "oidc-sha256-pairwise-sub-mapper", "saml-role-list-mapper", "saml-user-attribute-mapper", "oidc-usermodel-property-mapper", "oidc-usermodel-attribute-mapper", "oidc-full-name-mapper" ] + } + }, { + "id" : "be465734-3b0f-4370-a144-73db756e23f8", + "name" : "Allowed Protocol Mapper Types", + "providerId" : "allowed-protocol-mappers", + "subType" : "anonymous", + "subComponents" : { }, + "config" : { + "allowed-protocol-mapper-types" : [ "oidc-usermodel-attribute-mapper", "saml-user-property-mapper", "oidc-address-mapper", "oidc-sha256-pairwise-sub-mapper", "saml-user-attribute-mapper", "oidc-full-name-mapper", "oidc-usermodel-property-mapper", "saml-role-list-mapper" ] + } + }, { + "id" : "42a2f64d-ac9e-4221-9cf6-40ff8c868629", + "name" : "Trusted Hosts", + "providerId" : "trusted-hosts", + "subType" : "anonymous", + "subComponents" : { }, + "config" : { + "host-sending-registration-request-must-match" : [ "true" ], + "client-uris-must-match" : [ "true" ] + } + }, { + "id" : "7ca08915-6c33-454c-88f2-20e1d6553b26", + "name" : "Max Clients Limit", + "providerId" : "max-clients", + "subType" : "anonymous", + "subComponents" : { }, + "config" : { + "max-clients" : [ "200" ] + } + }, { + "id" : "f01f2b6f-3f01-4d01-b2f4-70577c6f599c", + "name" : "Allowed Client Scopes", + "providerId" : "allowed-client-templates", + "subType" : "anonymous", + "subComponents" : { }, + "config" : { + "allow-default-scopes" : [ "true" ] + } + }, { + "id" : "516d7f21-f21a-4690-831e-36ad313093b2", + "name" : "Consent Required", + "providerId" : "consent-required", + "subType" : "anonymous", + "subComponents" : { }, + "config" : { } + }, { + "id" : "c79df6a0-d4d8-4866-b9e6-8ddb5d1bd38e", + "name" : "Full Scope Disabled", + "providerId" : "scope", + "subType" : "anonymous", + "subComponents" : { }, + "config" : { } + } ], + "org.keycloak.userprofile.UserProfileProvider" : [ { + "id" : "cf47a21f-c8fb-42f2-9bff-feca967db183", + "providerId" : "declarative-user-profile", + "subComponents" : { }, + "config" : { } + } ], + "org.keycloak.keys.KeyProvider" : [ { + "id" : "6b4a2281-a9e8-43ab-aee7-190ae91b2842", + "name" : "aes-generated", + "providerId" : "aes-generated", + "subComponents" : { }, + "config" : { + "kid" : [ "47b9c2c2-32dc-4317-bd8b-1c4e5bb740ca" ], + "secret" : [ "9VWsVSqbj5zWa8Mq-rRzOw" ], + "priority" : [ "100" ] + } + }, { + "id" : "68e2d2b0-4976-480f-ab76-f84a17686b05", + "name" : "rsa-enc-generated", + "providerId" : "rsa-enc-generated", + "subComponents" : { }, + "config" : { + "privateKey" : [ "MIIEpQIBAAKCAQEAwuIcVVJDncorsQcFef4M/J9dsaNNmwEv/+4pCSZuco7IlA9uCfvwjYgfwQlWoCHCc7JFEtUOXhpLNR0SJ9w2eCC9A/0horjLmiVGU5sGACGrAxSgipt399k83mtkPBTikT1BXumPrX51ovdEPVPQSO0hIBwFn4ZDwA9P/00jNzzswyLC2UDdQrwIjm2xWjq1X82d8mL3+Yp8lF9qD1w305+XPiqCC+TUunKsuCQq5sddet+UoCDsFQyxsJi6cWJrryDvQmiDgM2wm68jn6hyzDE76J1az0wKEGqoMEwIy0juqZCyAqgsm3xA+zHpTcI3EyTwDGpMvWNJp8AWqXPNaQIDAQABAoIBAAethL1+n/6WpUBEaoHcVrq5/2+vo0+dfTyVZNKRFqtG0WOWPzOflFd1HZV7YVPuJI+uPi8ANmsnbh9YcaYg9JiTZ0hMZ++giBf0ID2hZxv995NyXnf7fkoFKghevYG+9mVPtHRmxKlKiPFWfHQjP1ACNKAD2UZdcdbzxicaIkPV/hP996mZA3xaaudggAJq7u/W67H2Q6ofGqW4TI5241d8T+6yobbvXRe4n8FKz4eK2aZv+N+zwh5JDMsJ8050+lCDsyoyakEPf+4veuPkewx4FemAiotDNcmoUQSDL26wLw8kk1uZ9JY0M88OL5pMyBuxTqy0F6BWBltq80mlefECgYEA4vZ8Agu2plXOzWASn0dyhCel3QoeUqNY8D8A+0vK9qWxUE9jMG13jAZmsL2I38SuwRN1DhJezbrn4QTuxTukxgSjLDv/pBp9UnXnCz/fg4yPTYsZ0zHqTMbwvdtfIzBHTCYyIJ+unxVYoenC0XZKSQXA3NN2zNqYpLhjStWdEZECgYEA29DznJxpDZsRUieRxFgZ+eRCjbQ9Q2A46preqMo1KOZ6bt9avxG3uM7pUC+UOeIizeRzxPSJ2SyptYPzdaNwKN3Lq+RhjHe1zYLngXb0CIQaRwNHqePxXF1sg0dTbmcxf+Co7yPG+Nd5nrQq9SQHC3tLTyL6x3VU/yAfMQqUklkCgYEAyVl8iGAV6RkE/4R04OOEv6Ng7WkVn6CUvYZXe5kw9YHnfWUAjS0AOrRPFAsBy+r0UgvN8+7uNjvTjPhQT5/rPVVN4WdVEyQA/E/m6j7/LvhbBaMbBRcqUnTHjNd6XoBtMCxOmkyvoShR2krE8AiuPHwjLoVXxsNDWhbO18wMrVECgYEAlmkICOXNzI2K8Jg62gse2yshjy0BrpSs3XtTWFPkxDPRGwSiZ5OMD10lsMSdvG3MOu5TeTWLDZvOFHJRqPFI0e3Sa7A+P4u6TwF/v8rRePJLuMO5ybo7cWRL2Bh6MlVSPZpQfjIQ+D0Y70uBCXS5jVW0VlYtG0Zh/qDQNxJyTyECgYEAuRINlZ0ag+1QTITapSatbFWd/KquGLpMjZyF4k5gVHs+4zHnnTi1YIDUInp1FJBqKD27z2byy7KFgbMBZQmsDs8i4fgzQrJHe3D4WFFHCjiClbeReejbas9bOnqhSQCiIy1Ck8vMAriAtctSA/g/qq6dQApSgcWaKvTVL2Ywa7E=" ], + "keyUse" : [ "ENC" ], + "certificate" : [ "MIIClzCCAX8CBgGIQhOIijANBgkqhkiG9w0BAQsFADAPMQ0wCwYDVQQDDAR0ZXN0MB4XDTIzMDUyMjA2MDczNloXDTMzMDUyMjA2MDkxNlowDzENMAsGA1UEAwwEdGVzdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMLiHFVSQ53KK7EHBXn+DPyfXbGjTZsBL//uKQkmbnKOyJQPbgn78I2IH8EJVqAhwnOyRRLVDl4aSzUdEifcNnggvQP9IaK4y5olRlObBgAhqwMUoIqbd/fZPN5rZDwU4pE9QV7pj61+daL3RD1T0EjtISAcBZ+GQ8APT/9NIzc87MMiwtlA3UK8CI5tsVo6tV/NnfJi9/mKfJRfag9cN9Oflz4qggvk1LpyrLgkKubHXXrflKAg7BUMsbCYunFia68g70Jog4DNsJuvI5+ocswxO+idWs9MChBqqDBMCMtI7qmQsgKoLJt8QPsx6U3CNxMk8AxqTL1jSafAFqlzzWkCAwEAATANBgkqhkiG9w0BAQsFAAOCAQEAIEIfjqOr2m+8s2RR8VW/nBgOgu9HtPRda4qNhGbgBkZ8NDy7TwHqlHo1ujKW5RO438pRyLJmOibWN4a/rkUsSjin6vgy4l8KpQy+7a4cQCQHyl34TmPjbtiw1jKgiOjzRQY54NVwIJNMIMc1ZyQo4u0U30/FxgUv6akXfS5O1ePD+5xKOOC/Af9AletjhQMPwVxXDwFqfQf/p+SM4Pyn4L633MESfDrH8v9FjJd0lV5ZlEI4hpPtnbi9U+CInqCy3VDNlZjsXswaDRujjg3LERfOMvCgj+Dck3FzWG7EiCwXWNEPvdMzv4w7M6KXuiPPQkST8DUWjgkjUCeLBzT3yw==" ], + "priority" : [ "100" ], + "algorithm" : [ "RSA-OAEP" ] + } + }, { + "id" : "728769a3-99a4-4cca-959d-28181dfee7e8", + "name" : "rsa-generated", + "providerId" : "rsa-generated", + "subComponents" : { }, + "config" : { + "privateKey" : [ "MIIEowIBAAKCAQEAxIszQCv8bX3sKXJVtuLJV6cH/uhkzxcTEIcDe7y2Y2SFM0x2nF6wRLk8QkvIrRmelilegUIJttqZxLXMpxwUJGizehHQMrOCzNoGBZdVanoK7nNa5+FOYtlvL4GxNfwzS36sp3PnKQiGv5Q7RGuPthjLFfqTmYx/7GTDJC4vLEW5S01Vy/Xc9FE4FsT0hnm91lRWjppc9893M5QUy/TPu8udIuNV87Ko5yiIxQqcPiAQXJaN4CyGaDcYhhzzHdxVptIk2FvtxhpmNxrbtmBCx/o9/rBDQNTis8Ex6ItWC2PvC17UPvyOcZ4Fv/qO0L6JZ0mrpH95CeDU1kEP+KKZrwIDAQABAoIBAGGl6SYiVG1PyTQEXqqY/UCjt3jBnEg5ZhrpgWUKKrGyAO2uOSXSc5AJWfN0NHUwC9b+IbplhW8IJ6qQSmfiLu2x6S2mSQLPphZB4gkIGYNntCOpQ0p+aZP6BGAddt5j+VYyTvR5RKlh15S6QEHrkMB/i/LVBl0c7XeUzlEc8wnyj8DGvlmpcQzIcbWfqEZ/FciDdKGNN0M4V/r1uQiOUVZ69SWDBBwu41YwF7PYUsX83q8zn0nBeMqz0ggSf33lW4w31fox9c7EjIF01gPArE5uT+d+AwjVKHpd08LWGR9W9NSXVOPUKkzOM+PyvKGvzjMnlrm/feqowKQbL2q/GP0CgYEA/EsrvUojkFIWxHc19KJdJvqlYgLeWq6P/J7UmHgpl+S3nG6b9HH4/aM/ICDa5hxd5bmP5p2V3EuZWnyb6/QB5eipC7Ss3oM7XeS/PwvTp6NTC1fypx2zHKse3iuLeCGneRxiw15mB02ArJ/qJw/VSQK2J7RiR4+b6HYpdzQnIysCgYEAx25dTQqskQqsx/orJzuUqfNv/C0W4vqfz1eL3akFrdK+YqghXKFsDmh61JpTrTKnRLAdQeyOrhKwbNsdxSEEaeeLayKLVlimoFXGd/LZb5LQiwFcrvTzhnB+FLmFgqTnuLkpfY1woHEwSW9TpJewjbT9S6g0L2uh223nVXuLMY0CgYEA3pMOlmMGtvbEoTSuRBDNb2rmZm4zbfrcijgxRAWWZCtiFL68FU5LJLBVK2nw09sot1cabZCOuhdzxhFymRneZs73+5y8eV17DV2VnvA3HIiI5dQD/YzFDECm7ceqtiOylLUHKGZqSn0ETMaTkzxzpIKg4qxPm+RE3jMIZ+J5uJsCgYBk2iUIrtsxxgo2Xwavomu9vkPlbQ/j3QYwHn+2qqEalDZ/QbMNWvyAFMn49cpXDgSUsdM54V0OHpllkzFs3ROUUumoViHMmqw47OefBQp8Z+xaP2gVef4lAIJiDKe9t5MPUWPwADTyjgrzN/8+fw9juiFVv0wUpwOFKgEQs5diiQKBgC6RpZESc5Nl4nHrDvIl5n/zYED6BaXoLl15NhcoBudt5SIRO/RpvBW69A7aE/UK6p7WXjq4mP1ssIWz4KgATCoXUgYvn0a7Ql79r/CMce6/FvcuweED6u6bD0kdXuYhe8fR9IPmLfnnb4Cx3JOJeRZbiBSP5HOZJ7nsKibxcgPm" ], + "keyUse" : [ "SIG" ], + "certificate" : [ "MIIClzCCAX8CBgGIQhOHjjANBgkqhkiG9w0BAQsFADAPMQ0wCwYDVQQDDAR0ZXN0MB4XDTIzMDUyMjA2MDczNloXDTMzMDUyMjA2MDkxNlowDzENMAsGA1UEAwwEdGVzdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMSLM0Ar/G197ClyVbbiyVenB/7oZM8XExCHA3u8tmNkhTNMdpxesES5PEJLyK0ZnpYpXoFCCbbamcS1zKccFCRos3oR0DKzgszaBgWXVWp6Cu5zWufhTmLZby+BsTX8M0t+rKdz5ykIhr+UO0Rrj7YYyxX6k5mMf+xkwyQuLyxFuUtNVcv13PRROBbE9IZ5vdZUVo6aXPfPdzOUFMv0z7vLnSLjVfOyqOcoiMUKnD4gEFyWjeAshmg3GIYc8x3cVabSJNhb7cYaZjca27ZgQsf6Pf6wQ0DU4rPBMeiLVgtj7wte1D78jnGeBb/6jtC+iWdJq6R/eQng1NZBD/iima8CAwEAATANBgkqhkiG9w0BAQsFAAOCAQEAe0Bo1UpGfpOlJiVhp0XWExm8bdxFgXOU2M5XeZBsWAqBehvJkzn+tbAtlVNiIiN58XFFpH+xLZ2nJIZR5FHeCD3bYAgK72j5k45HJI95vPyslelfT/m3Np78+1iUa1U1WxN40JaowP1EeTkk5O8Pk4zTQ1Ne1usmKd+SJxI1KWN0kKuVFMmdNRb5kQKWeQvOSlWl7rd4bvHGvVnxgcPC1bshEJKRt+VpaUjpm6CKd8C3Kt7IWfIX4HTVhKZkmLn7qv6aSfwWelwZfLdaXcLXixqzqNuUk/VWbF9JT4iiag9F3mt7xryIkoRp1AEjCA82HqK72F4JCFyOhCiGrMfKJw==" ], + "priority" : [ "100" ] + } + }, { + "id" : "f30af2d2-d042-43b8-bc6d-22f6bab6934c", + "name" : "hmac-generated", + "providerId" : "hmac-generated", + "subComponents" : { }, + "config" : { + "kid" : [ "6f0d9688-e974-42b4-9d84-8d098c51007c" ], + "secret" : [ "8nruwD66Revr9k21e-BHtcyvNzAMFOsstxSAB0Gdy2qe2qGRm2kYOwsPzrH9ZQSdj2041SraKo6a3SHvCyTBAQ" ], + "priority" : [ "100" ], + "algorithm" : [ "HS256" ] + } + } ] + }, + "internationalizationEnabled" : false, + "supportedLocales" : [ ], + "authenticationFlows" : [ { + "id" : "94c65ba1-ba50-4be2-94c4-de656145eb67", + "alias" : "Account verification options", + "description" : "Method with which to verity the existing account", + "providerId" : "basic-flow", + "topLevel" : false, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "idp-email-verification", + "authenticatorFlow" : false, + "requirement" : "ALTERNATIVE", + "priority" : 10, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticatorFlow" : true, + "requirement" : "ALTERNATIVE", + "priority" : 20, + "autheticatorFlow" : true, + "flowAlias" : "Verify Existing Account by Re-authentication", + "userSetupAllowed" : false + } ] + }, { + "id" : "3b706ddf-c4b6-498a-803c-772878bc9bc3", + "alias" : "Authentication Options", + "description" : "Authentication options.", + "providerId" : "basic-flow", + "topLevel" : false, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "basic-auth", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 10, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticator" : "basic-auth-otp", + "authenticatorFlow" : false, + "requirement" : "DISABLED", + "priority" : 20, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticator" : "auth-spnego", + "authenticatorFlow" : false, + "requirement" : "DISABLED", + "priority" : 30, + "autheticatorFlow" : false, + "userSetupAllowed" : false + } ] + }, { + "id" : "9ea0b8f6-882c-45ad-9110-78adf5a5d233", + "alias" : "Browser - Conditional OTP", + "description" : "Flow to determine if the OTP is required for the authentication", + "providerId" : "basic-flow", + "topLevel" : false, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "conditional-user-configured", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 10, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticator" : "auth-otp-form", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 20, + "autheticatorFlow" : false, + "userSetupAllowed" : false + } ] + }, { + "id" : "99c5ba83-b585-4601-b740-1a26670bf4e9", + "alias" : "Direct Grant - Conditional OTP", + "description" : "Flow to determine if the OTP is required for the authentication", + "providerId" : "basic-flow", + "topLevel" : false, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "conditional-user-configured", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 10, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticator" : "direct-grant-validate-otp", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 20, + "autheticatorFlow" : false, + "userSetupAllowed" : false + } ] + }, { + "id" : "65b73dec-7dd1-4de8-b542-a023b7104afc", + "alias" : "First broker login - Conditional OTP", + "description" : "Flow to determine if the OTP is required for the authentication", + "providerId" : "basic-flow", + "topLevel" : false, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "conditional-user-configured", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 10, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticator" : "auth-otp-form", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 20, + "autheticatorFlow" : false, + "userSetupAllowed" : false + } ] + }, { + "id" : "9a26b76f-da95-43f1-8da3-16c4a0654f07", + "alias" : "Handle Existing Account", + "description" : "Handle what to do if there is existing account with same email/username like authenticated identity provider", + "providerId" : "basic-flow", + "topLevel" : false, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "idp-confirm-link", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 10, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticatorFlow" : true, + "requirement" : "REQUIRED", + "priority" : 20, + "autheticatorFlow" : true, + "flowAlias" : "Account verification options", + "userSetupAllowed" : false + } ] + }, { + "id" : "0a77285e-d7d5-4b6c-aa9a-3eadb5e7e3d3", + "alias" : "Reset - Conditional OTP", + "description" : "Flow to determine if the OTP should be reset or not. Set to REQUIRED to force.", + "providerId" : "basic-flow", + "topLevel" : false, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "conditional-user-configured", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 10, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticator" : "reset-otp", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 20, + "autheticatorFlow" : false, + "userSetupAllowed" : false + } ] + }, { + "id" : "cb6c0b3b-2f5f-4493-9d14-6130f8b58dd7", + "alias" : "User creation or linking", + "description" : "Flow for the existing/non-existing user alternatives", + "providerId" : "basic-flow", + "topLevel" : false, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticatorConfig" : "create unique user config", + "authenticator" : "idp-create-user-if-unique", + "authenticatorFlow" : false, + "requirement" : "ALTERNATIVE", + "priority" : 10, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticatorFlow" : true, + "requirement" : "ALTERNATIVE", + "priority" : 20, + "autheticatorFlow" : true, + "flowAlias" : "Handle Existing Account", + "userSetupAllowed" : false + } ] + }, { + "id" : "0fd3db1b-e93d-4768-82ca-a1498ddc11d0", + "alias" : "Verify Existing Account by Re-authentication", + "description" : "Reauthentication of existing account", + "providerId" : "basic-flow", + "topLevel" : false, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "idp-username-password-form", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 10, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticatorFlow" : true, + "requirement" : "CONDITIONAL", + "priority" : 20, + "autheticatorFlow" : true, + "flowAlias" : "First broker login - Conditional OTP", + "userSetupAllowed" : false + } ] + }, { + "id" : "86610e70-f9f5-4c11-8a9e-9de1770565fb", + "alias" : "browser", + "description" : "browser based authentication", + "providerId" : "basic-flow", + "topLevel" : true, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "auth-cookie", + "authenticatorFlow" : false, + "requirement" : "ALTERNATIVE", + "priority" : 10, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticator" : "auth-spnego", + "authenticatorFlow" : false, + "requirement" : "DISABLED", + "priority" : 20, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticator" : "identity-provider-redirector", + "authenticatorFlow" : false, + "requirement" : "ALTERNATIVE", + "priority" : 25, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticatorFlow" : true, + "requirement" : "ALTERNATIVE", + "priority" : 30, + "autheticatorFlow" : true, + "flowAlias" : "forms", + "userSetupAllowed" : false + } ] + }, { + "id" : "f6aa23dd-8532-4d92-9780-3ea226481e3b", + "alias" : "clients", + "description" : "Base authentication for clients", + "providerId" : "client-flow", + "topLevel" : true, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "client-secret", + "authenticatorFlow" : false, + "requirement" : "ALTERNATIVE", + "priority" : 10, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticator" : "client-jwt", + "authenticatorFlow" : false, + "requirement" : "ALTERNATIVE", + "priority" : 20, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticator" : "client-secret-jwt", + "authenticatorFlow" : false, + "requirement" : "ALTERNATIVE", + "priority" : 30, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticator" : "client-x509", + "authenticatorFlow" : false, + "requirement" : "ALTERNATIVE", + "priority" : 40, + "autheticatorFlow" : false, + "userSetupAllowed" : false + } ] + }, { + "id" : "4d2caf65-1703-4ddb-8890-70232e91bcd8", + "alias" : "direct grant", + "description" : "OpenID Connect Resource Owner Grant", + "providerId" : "basic-flow", + "topLevel" : true, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "direct-grant-validate-username", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 10, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticator" : "direct-grant-validate-password", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 20, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticatorFlow" : true, + "requirement" : "CONDITIONAL", + "priority" : 30, + "autheticatorFlow" : true, + "flowAlias" : "Direct Grant - Conditional OTP", + "userSetupAllowed" : false + } ] + }, { + "id" : "eaa20c41-5334-4fb4-8c45-fb9cc71f7f74", + "alias" : "docker auth", + "description" : "Used by Docker clients to authenticate against the IDP", + "providerId" : "basic-flow", + "topLevel" : true, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "docker-http-basic-authenticator", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 10, + "autheticatorFlow" : false, + "userSetupAllowed" : false + } ] + }, { + "id" : "b9febfb1-f0aa-4590-b782-272a4aa11575", + "alias" : "first broker login", + "description" : "Actions taken after first broker login with identity provider account, which is not yet linked to any Keycloak account", + "providerId" : "basic-flow", + "topLevel" : true, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticatorConfig" : "review profile config", + "authenticator" : "idp-review-profile", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 10, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticatorFlow" : true, + "requirement" : "REQUIRED", + "priority" : 20, + "autheticatorFlow" : true, + "flowAlias" : "User creation or linking", + "userSetupAllowed" : false + } ] + }, { + "id" : "03bb6ff4-eccb-4f2f-8953-3769f78c3bf3", + "alias" : "forms", + "description" : "Username, password, otp and other auth forms.", + "providerId" : "basic-flow", + "topLevel" : false, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "auth-username-password-form", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 10, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticatorFlow" : true, + "requirement" : "CONDITIONAL", + "priority" : 20, + "autheticatorFlow" : true, + "flowAlias" : "Browser - Conditional OTP", + "userSetupAllowed" : false + } ] + }, { + "id" : "38385189-246b-4ea0-ac05-d49dfe1709da", + "alias" : "http challenge", + "description" : "An authentication flow based on challenge-response HTTP Authentication Schemes", + "providerId" : "basic-flow", + "topLevel" : true, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "no-cookie-redirect", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 10, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticatorFlow" : true, + "requirement" : "REQUIRED", + "priority" : 20, + "autheticatorFlow" : true, + "flowAlias" : "Authentication Options", + "userSetupAllowed" : false + } ] + }, { + "id" : "1022f3c2-0469-41c9-861e-918908f103df", + "alias" : "registration", + "description" : "registration flow", + "providerId" : "basic-flow", + "topLevel" : true, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "registration-page-form", + "authenticatorFlow" : true, + "requirement" : "REQUIRED", + "priority" : 10, + "autheticatorFlow" : true, + "flowAlias" : "registration form", + "userSetupAllowed" : false + } ] + }, { + "id" : "00d36c3b-e1dc-41f8-bfd0-5f8c80ea07e8", + "alias" : "registration form", + "description" : "registration form", + "providerId" : "form-flow", + "topLevel" : false, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "registration-user-creation", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 20, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticator" : "registration-profile-action", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 40, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticator" : "registration-password-action", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 50, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticator" : "registration-recaptcha-action", + "authenticatorFlow" : false, + "requirement" : "DISABLED", + "priority" : 60, + "autheticatorFlow" : false, + "userSetupAllowed" : false + } ] + }, { + "id" : "4374c16e-8c65-4168-94c2-df1ab3f3e6ad", + "alias" : "reset credentials", + "description" : "Reset credentials for a user if they forgot their password or something", + "providerId" : "basic-flow", + "topLevel" : true, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "reset-credentials-choose-user", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 10, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticator" : "reset-credential-email", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 20, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticator" : "reset-password", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 30, + "autheticatorFlow" : false, + "userSetupAllowed" : false + }, { + "authenticatorFlow" : true, + "requirement" : "CONDITIONAL", + "priority" : 40, + "autheticatorFlow" : true, + "flowAlias" : "Reset - Conditional OTP", + "userSetupAllowed" : false + } ] + }, { + "id" : "04d6ed6a-76c9-41fb-9074-bff8a80c2286", + "alias" : "saml ecp", + "description" : "SAML ECP Profile Authentication Flow", + "providerId" : "basic-flow", + "topLevel" : true, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "http-basic-authenticator", + "authenticatorFlow" : false, + "requirement" : "REQUIRED", + "priority" : 10, + "autheticatorFlow" : false, + "userSetupAllowed" : false + } ] + } ], + "authenticatorConfig" : [ { + "id" : "e7bad67d-1236-430a-a327-9194f9d1e2b0", + "alias" : "create unique user config", + "config" : { + "require.password.update.after.registration" : "false" + } + }, { + "id" : "287b5989-a927-4cf5-8067-74594ce19bc1", + "alias" : "review profile config", + "config" : { + "update.profile.on.first.login" : "missing" + } + } ], + "requiredActions" : [ { + "alias" : "CONFIGURE_TOTP", + "name" : "Configure OTP", + "providerId" : "CONFIGURE_TOTP", + "enabled" : true, + "defaultAction" : false, + "priority" : 10, + "config" : { } + }, { + "alias" : "terms_and_conditions", + "name" : "Terms and Conditions", + "providerId" : "terms_and_conditions", + "enabled" : false, + "defaultAction" : false, + "priority" : 20, + "config" : { } + }, { + "alias" : "UPDATE_PASSWORD", + "name" : "Update Password", + "providerId" : "UPDATE_PASSWORD", + "enabled" : true, + "defaultAction" : false, + "priority" : 30, + "config" : { } + }, { + "alias" : "UPDATE_PROFILE", + "name" : "Update Profile", + "providerId" : "UPDATE_PROFILE", + "enabled" : true, + "defaultAction" : false, + "priority" : 40, + "config" : { } + }, { + "alias" : "VERIFY_EMAIL", + "name" : "Verify Email", + "providerId" : "VERIFY_EMAIL", + "enabled" : true, + "defaultAction" : false, + "priority" : 50, + "config" : { } + }, { + "alias" : "delete_account", + "name" : "Delete Account", + "providerId" : "delete_account", + "enabled" : false, + "defaultAction" : false, + "priority" : 60, + "config" : { } + }, { + "alias" : "webauthn-register", + "name" : "Webauthn Register", + "providerId" : "webauthn-register", + "enabled" : true, + "defaultAction" : false, + "priority" : 70, + "config" : { } + }, { + "alias" : "webauthn-register-passwordless", + "name" : "Webauthn Register Passwordless", + "providerId" : "webauthn-register-passwordless", + "enabled" : true, + "defaultAction" : false, + "priority" : 80, + "config" : { } + }, { + "alias" : "update_user_locale", + "name" : "Update User Locale", + "providerId" : "update_user_locale", + "enabled" : true, + "defaultAction" : false, + "priority" : 1000, + "config" : { } + } ], + "browserFlow" : "browser", + "registrationFlow" : "registration", + "directGrantFlow" : "direct grant", + "resetCredentialsFlow" : "reset credentials", + "clientAuthenticationFlow" : "clients", + "dockerAuthenticationFlow" : "docker auth", + "attributes" : { + "cibaBackchannelTokenDeliveryMode" : "poll", + "cibaAuthRequestedUserHint" : "login_hint", + "oauth2DevicePollingInterval" : "5", + "clientOfflineSessionMaxLifespan" : "0", + "clientSessionIdleTimeout" : "0", + "clientOfflineSessionIdleTimeout" : "0", + "cibaInterval" : "5", + "cibaExpiresIn" : "120", + "oauth2DeviceCodeLifespan" : "600", + "parRequestUriLifespan" : "60", + "clientSessionMaxLifespan" : "0", + "frontendUrl" : "" + }, + "keycloakVersion" : "19.0.3", + "userManagedAccessAllowed" : false, + "clientProfiles" : { + "profiles" : [ ] + }, + "clientPolicies" : { + "policies" : [ ] + } +} \ No newline at end of file diff --git a/conf/localstack/buckets.sh b/conf/localstack/buckets.sh new file mode 100755 index 00000000000..fe940d9890d --- /dev/null +++ b/conf/localstack/buckets.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +# https://stackoverflow.com/questions/53619901/auto-create-s3-buckets-on-localstack +awslocal s3 mb s3://mybucket diff --git a/conf/proxy/Caddyfile b/conf/proxy/Caddyfile new file mode 100644 index 00000000000..70e6904d26e --- /dev/null +++ b/conf/proxy/Caddyfile @@ -0,0 +1,12 @@ +# This configuration is intended to be used with Caddy, a very small high perf proxy. +# It will serve the application containers Payara Admin GUI via HTTP instead of HTTPS, +# avoiding the trouble of self signed certificates for local development. + +:4848 { + reverse_proxy https://dataverse:4848 { + transport http { + tls_insecure_skip_verify + } + header_down Location "^https://" "http://" + } +} diff --git a/conf/solr/9.3.0/schema.xml b/conf/solr/9.3.0/schema.xml index 3c15b659c4e..90e9287d659 100644 --- a/conf/solr/9.3.0/schema.xml +++ b/conf/solr/9.3.0/schema.xml @@ -229,6 +229,8 @@ + + - + @@ -327,7 +329,7 @@ - + @@ -370,7 +372,7 @@ - + @@ -566,7 +568,7 @@ - + @@ -609,7 +611,7 @@ - + diff --git a/conf/solr/9.3.0/solrconfig.xml b/conf/solr/9.3.0/solrconfig.xml index b89315cdaa9..36ed4f23390 100644 --- a/conf/solr/9.3.0/solrconfig.xml +++ b/conf/solr/9.3.0/solrconfig.xml @@ -588,6 +588,7 @@ check for "Circuit Breakers tripped" in logs and the corresponding error message should tell you what transpired (if the failure was caused by tripped circuit breakers). --> + + + @@ -24,15 +24,15 @@
  • - + Community
  • @@ -49,18 +49,18 @@
  • - + Contact
  • diff --git a/doc/sphinx-guides/source/admin/collectionquotas.rst b/doc/sphinx-guides/source/admin/collectionquotas.rst new file mode 100644 index 00000000000..2ce3132e2ba --- /dev/null +++ b/doc/sphinx-guides/source/admin/collectionquotas.rst @@ -0,0 +1,19 @@ + +Storage Quotas for Collections +============================== + +Please note that this is a new and still experimental feature (as of Dataverse v6.1 release). + +Instance admins can now define storage quota limits for specific collections. These limits can be set, changed and/or deleted via the provided APIs (please see the :ref:`collection-storage-quotas` section of the :doc:`/api/native-api` guide). The Read version of the API is available to the individual collection admins (i.e., a collection owner can check on the quota configured for their collection), but only superusers can set, change or disable storage quotas. + +Storage quotas are *inherited* by subcollections. In other words, when storage use limit is set for a specific collection, it applies to all the datasets immediately under it and in its sub-collections, unless different quotas are defined there and so on. Each file added to any dataset in that hierarchy counts for the purposes of the quota limit defined for the top collection. A storage quota defined on a child sub-collection overrides whatever quota that may be defined on the parent, or inherited from an ancestor. + +For example, a collection ``A`` has the storage quota set to 10GB. It has 3 sub-collections, ``B``, ``C`` and ``D``. Users can keep uploading files into the datasets anywhere in this hierarchy until the combined size of 10GB is reached between them. However, if an admin has reasons to limit one of the sub-collections, ``B`` to 3GB only, that quota can be explicitly set there. This both limits the growth of ``B`` to 3GB, and also *guarantees* that allocation to it. I.e. the contributors to collection ``B`` will be able to keep adding data until the 3GB limit is reached, even after the parent collection ``A`` reaches the combined 10GB limit (at which point ``A`` and all its subcollections except for ``B`` will become read-only). + +We do not yet know whether this is going to be a popular, or needed use case - a child collection quota that is different from the quota it inherits from a parent. It is likely that for many instances it will be sufficient to be able to define quotas for collections and have them apply to all the child objects underneath. We will examine the response to this feature and consider making adjustments to this scheme based on it. We are already considering introducing other types of quotas, such as limits by users or specific storage volumes. + +Please note that only the sizes of the main datafiles and the archival tab-delimited format versions, as produced by the ingest process are counted for the purposes of enforcing the limits. Automatically generated "auxiliary" files, such as rescaled image thumbnails and metadata exports for datasets are not. + +When quotas are set and enforced, the users will be informed of the remaining storage allocation on the file upload page together with other upload and processing limits. + +Part of the new and experimental nature of this feature is that we don't know for the fact yet how well it will function in real life on a very busy production system, despite our best efforts to test it prior to the release. One specific issue is having to update the recorded storage use for every parent collection of the given dataset whenever new files are added. This includes updating the combined size of the root, top collection - which will need to be updated after *every* file upload. In an unlikely case that this will start causing problems with race conditions and database update conflicts, it is possible to disable these updates (and thus disable the storage quotas feature), by setting the :ref:`dataverse.storageuse.disable-storageuse-increments` JVM setting to true. diff --git a/doc/sphinx-guides/source/admin/dataverses-datasets.rst b/doc/sphinx-guides/source/admin/dataverses-datasets.rst index 170807d3d67..37494c57fa1 100644 --- a/doc/sphinx-guides/source/admin/dataverses-datasets.rst +++ b/doc/sphinx-guides/source/admin/dataverses-datasets.rst @@ -53,11 +53,15 @@ Configure a Dataverse Collection to Store All New Files in a Specific File Store To direct new files (uploaded when datasets are created or edited) for all datasets in a given Dataverse collection, the store can be specified via the API as shown below, or by editing the 'General Information' for a Dataverse collection on the Dataverse collection page. Only accessible to superusers. :: curl -H "X-Dataverse-key: $API_TOKEN" -X PUT -d $storageDriverLabel http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver + +(Note that for ``dataverse.files.store1.label=MyLabel``, you should pass ``MyLabel``.) The current driver can be seen using:: curl -H "X-Dataverse-key: $API_TOKEN" http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver +(Note that for ``dataverse.files.store1.label=MyLabel``, ``store1`` will be returned.) + and can be reset to the default store with:: curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver diff --git a/doc/sphinx-guides/source/admin/external-tools.rst b/doc/sphinx-guides/source/admin/external-tools.rst index 67075e986bb..346ca0b15ee 100644 --- a/doc/sphinx-guides/source/admin/external-tools.rst +++ b/doc/sphinx-guides/source/admin/external-tools.rst @@ -115,7 +115,7 @@ Dataset level explore tools allow the user to explore all the files in a dataset Dataset Level Configure Tools +++++++++++++++++++++++++++++ -Configure tools at the dataset level are not currently supported. +Dataset level configure tools can be launched by users who have edit access to the dataset. These tools are found under the "Edit Dataset" menu. Writing Your Own External Tool ------------------------------ diff --git a/doc/sphinx-guides/source/admin/index.rst b/doc/sphinx-guides/source/admin/index.rst index ac81aa737a7..633842044b4 100755 --- a/doc/sphinx-guides/source/admin/index.rst +++ b/doc/sphinx-guides/source/admin/index.rst @@ -27,6 +27,7 @@ This guide documents the functionality only available to superusers (such as "da solr-search-index ip-groups mail-groups + collectionquotas monitoring reporting-tools-and-queries maintenance diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst index 21adf8338d9..2b6bdb8eeb5 100644 --- a/doc/sphinx-guides/source/admin/integrations.rst +++ b/doc/sphinx-guides/source/admin/integrations.rst @@ -121,6 +121,18 @@ Its goal is to make the dashboard adjustable for a Dataverse installation's need The integrations dashboard is currently in development. A preview and more information can be found at: `rdm-integration GitHub repository `_ +Globus +++++++ + +Globus transfer uses an efficient transfer mechanism and has additional features that make it suitable for large files and large numbers of files: + +* robust file transfer capable of restarting after network or endpoint failures +* third-party transfer, which enables a user accessing a Dataverse installation in their desktop browser to initiate transfer of their files from a remote endpoint (i.e. on a local high-performance computing cluster), directly to an S3 store managed by the Dataverse installation + +Users can transfer files via `Globus `_ into and out of datasets, or reference files on a remote Globus endpoint, when their Dataverse installation is configured to use a Globus accessible store(s) +and a community-developed `dataverse-globus `_ app has been properly installed and configured. + + Embedding Data on Websites -------------------------- @@ -185,6 +197,16 @@ Avgidea Data Search Researchers can use a Google Sheets add-on to search for Dataverse installation's CSV data and then import that data into a sheet. See `Avgidea Data Search `_ for details. +JupyterHub +++++++++++ + +The `Dataverse-to-JupyterHub Data Transfer Connector `_ streamlines data transfer between Dataverse repositories and the cloud-based platform JupyterHub, enhancing collaborative research. +This connector facilitates seamless two-way transfer of datasets and files, emphasizing the potential of an integrated research environment. +It is a lightweight client-side web application built using React and relying on the Dataverse External Tool feature, allowing for easy deployment on modern integration systems. Currently, it supports small to medium-sized files, with plans to enable support for large files and signed Dataverse endpoints in the future. + +What kind of user is the feature intended for? +The feature is intended for researchers, scientists and data analyst who are working with Dataverse instances and JupyterHub looking to ease the data transfer process. See `presentation `_ for details. + .. _integrations-discovery: Discoverability @@ -202,6 +224,11 @@ Geodisy `Geodisy `_ will take your Dataverse installation’s data, search for geospatial metadata and files, and copy them to a new system that allows for visual searching. Your original data and search methods are untouched; you have the benefit of both. For more information, please refer to `Geodisy's GitHub Repository. `_ +DataONE ++++++++ + +`DataONE `_ is a community driven program providing access to data across multiple `member repositories `_, supporting enhanced search and discovery of Earth and environmental data. Membership is free and is most easily achieved by providing schema.org data via `science-on-schema.org `_ metadata markup on dataset landing pages, support for which is native in Dataverse. Dataverse installations are welcome `join the network `_ to have their datasets included. + Research Data Preservation -------------------------- @@ -217,7 +244,14 @@ Sponsored by the `Ontario Council of University Libraries (OCUL) `_ zipped `BagIt `_ bags to the `Chronopolis `_ via `DuraCloud `_, to a local file system, or to `Google Cloud Storage `_. +A Dataverse installation can be configured to submit a copy of published Dataset versions, packaged as `Research Data Alliance conformant `_ zipped `BagIt `_ bags to `Chronopolis `_ via `DuraCloud `_, a local file system, any S3 store, or to `Google Cloud Storage `_. +Submission can be automated to occur upon publication, or can be done periodically (via external scripting). +The archival status of each Dataset version can be seen in the Dataset page version table and queried via API. + +The archival Bags include all of the files and metadata in a given dataset version and are sufficient to recreate the dataset, e.g. in a new Dataverse instance, or potentially in another RDA-conformant repository. +Specifically, the archival Bags include an OAI-ORE Map serialized as JSON-LD that describe the dataset and it's files, as well as information about the version of Dataverse used to export the archival Bag. + +The `DVUploader `_ includes functionality to recreate a Dataset from an archival Bag produced by Dataverse (using the Dataverse API to do so). For details on how to configure this integration, see :ref:`BagIt Export` in the :doc:`/installation/config` section of the Installation Guide. @@ -226,7 +260,7 @@ Future Integrations The `Dataverse Project Roadmap `_ is a good place to see integrations that the core Dataverse Project team is working on. -The `Community Dev `_ column of our project board is a good way to track integrations that are being worked on by the Dataverse Community but many are not listed and if you have an idea for an integration, please ask on the `dataverse-community `_ mailing list if someone is already working on it. +If you have an idea for an integration, please ask on the `dataverse-community `_ mailing list if someone is already working on it. Many integrations take the form of "external tools". See the :doc:`external-tools` section for details. External tool makers should check out the :doc:`/api/external-tools` section of the API Guide. diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index 4f737bd730b..66911aa0ad1 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -37,8 +37,8 @@ tab-separated value (TSV). [1]_\ :sup:`,`\ [2]_ While it is technically possible to define more than one metadata block in a TSV file, it is good organizational practice to define only one in each file. -The metadata block TSVs shipped with the Dataverse Software are in `/tree/develop/scripts/api/data/metadatablocks -`__ and the corresponding ResourceBundle property files `/tree/develop/src/main/java `__ of the Dataverse Software GitHub repo. Human-readable copies are available in `this Google Sheets +The metadata block TSVs shipped with the Dataverse Software are in `/scripts/api/data/metadatablocks +`__ with the corresponding ResourceBundle property files in `/src/main/java/propertyFiles `__ of the Dataverse Software GitHub repo. Human-readable copies are available in `this Google Sheets document `__ but they tend to get out of sync with the TSV files, which should be considered authoritative. The Dataverse Software installation process operates on the TSVs, not the Google spreadsheet. About the metadata block TSV @@ -413,7 +413,7 @@ Setting Up a Dev Environment for Testing You have several options for setting up a dev environment for testing metadata block changes: -- Docker: See :doc:`/container/index`. +- Docker: See :doc:`/container/running/metadata-blocks` in the Container Guide. - AWS deployment: See the :doc:`/developers/deployment` section of the Developer Guide. - Full dev environment: See the :doc:`/developers/dev-environment` section of the Developer Guide. @@ -648,6 +648,28 @@ Alternatively, you are welcome to request "edit" access to this "Tips for Datave The thinking is that the tips can become issues and the issues can eventually be worked on as features to improve the Dataverse Software metadata system. +Development Tasks Specific to Changing Fields in Core Metadata Blocks +--------------------------------------------------------------------- + +When it comes to the fields from the core blocks that are distributed with Dataverse (such as Citation, Social Science and Geospatial blocks), code dependencies may exist in Dataverse, primarily in the Import and Export subsystems, on these fields being configured a certain way. So, if it becomes necessary to modify one of such core fields, code changes may be necessary to accompany the change in the block tsv, plus some sample and test files maintained in the Dataverse source tree will need to be adjusted accordingly. + +Making a Field Multi-Valued +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As a recent real life example, a few fields from the Citation and Social Science block were changed to support multiple values, in order to accommodate specific needs of some community member institutions. A PR for one of these fields, ``alternativeTitle`` from the Citation block is linked below. Each time a number of code changes, plus some changes in the sample metadata files in the Dataverse code tree had to be made. The checklist below is to help another developer in the event that a similar change becomes necessary in the future. Note that some of the steps below may not apply 1:1 to a different metadata field, depending on how it is exported and imported in various formats by Dataverse. It may help to consult the PR `#9440 `_ as a specific example of the changes that had to be made for the ``alternativeTitle`` field. + +- Change the value from ``FALSE`` to ``TRUE`` in the ``allowmultiples`` column of the .tsv file for the block. +- Change the value of the ``multiValued`` attribute for the search field in the Solr schema (``conf/solr/x.x.x/schema.xml``). +- Modify the DDI import code (``ImportDDIServiceBean.java``) to support multiple values. (You may be able to use the change in the PR above as a model.) +- Modify the DDI export utility (``DdiExportUtil.java``). +- Modify the OpenAire export utility (``OpenAireExportUtil.java``). +- Modify the following JSON source files in the Dataverse code tree to actually include multiple values for the field (two should be quite enough!): ``scripts/api/data/dataset-create-new-all-default-fields.json``, ``src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt``, ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json``. (These are used as examples for populating datasets via the import API and by the automated import and export code tests). +- Similarly modify the following XML files that are used by the DDI export code tests: ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml``. +- Make sure all the automated unit and integration tests are passing. See :doc:`/developers/testing` in the Developer Guide. +- Write a short release note to announce the change in the upcoming release. See :ref:`writing-release-note-snippets` in the Developer Guide. +- Make a pull request. + + Footnotes --------- diff --git a/doc/sphinx-guides/source/admin/monitoring.rst b/doc/sphinx-guides/source/admin/monitoring.rst index a4affda1302..04fba23a3e8 100644 --- a/doc/sphinx-guides/source/admin/monitoring.rst +++ b/doc/sphinx-guides/source/admin/monitoring.rst @@ -1,7 +1,7 @@ Monitoring =========== -Once you're in production, you'll want to set up some monitoring. This page may serve as a starting point for you but you are encouraged to share your ideas with the Dataverse community! +Once you're in production, you'll want to set up some monitoring. This page may serve as a starting point for you but you are encouraged to share your ideas with the Dataverse community! You may also be interested in the :doc:`/developers/performance` section of the Developer Guide. .. contents:: Contents: :local: @@ -14,7 +14,7 @@ In production you'll want to monitor the usual suspects such as CPU, memory, fre Munin +++++ -http://munin-monitoring.org says, "A default installation provides a lot of graphs with almost no work." From RHEL or CentOS 7, you can try the following steps. +https://munin-monitoring.org says, "A default installation provides a lot of graphs with almost no work." From RHEL or CentOS 7, you can try the following steps. Enable the EPEL yum repo (if you haven't already): diff --git a/doc/sphinx-guides/source/admin/solr-search-index.rst b/doc/sphinx-guides/source/admin/solr-search-index.rst index e6f7b588ede..3f7b9d5b547 100644 --- a/doc/sphinx-guides/source/admin/solr-search-index.rst +++ b/doc/sphinx-guides/source/admin/solr-search-index.rst @@ -26,8 +26,8 @@ Remove all Solr documents that are orphaned (i.e. not associated with objects in ``curl http://localhost:8080/api/admin/index/clear-orphans`` -Clearing Data from Solr -~~~~~~~~~~~~~~~~~~~~~~~ +Clearing ALL Data from Solr +~~~~~~~~~~~~~~~~~~~~~~~~~~~ Please note that the moment you issue this command, it will appear to end users looking at the root Dataverse installation page that all data is gone! This is because the root Dataverse installation page is powered by the search index. @@ -86,6 +86,16 @@ To re-index a dataset by its database ID: ``curl http://localhost:8080/api/admin/index/datasets/7504557`` +Clearing a Dataset from Solr +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This API will clear the Solr entry for the dataset specified. It can be useful if you have reasons to want to hide a published dataset from showing in search results and/or on Collection pages, but don't want to destroy and purge it from the database just yet. + +``curl -X DELETE http://localhost:8080/api/admin/index/datasets/`` + +This can be reversed of course by re-indexing the dataset with the API above. + + Manually Querying Solr ---------------------- diff --git a/doc/sphinx-guides/source/api/apps.rst b/doc/sphinx-guides/source/api/apps.rst index a498c62d3d4..44db666736c 100755 --- a/doc/sphinx-guides/source/api/apps.rst +++ b/doc/sphinx-guides/source/api/apps.rst @@ -94,6 +94,13 @@ This series of Python scripts offers a starting point for migrating datasets fro https://github.com/scholarsportal/dataverse-migration-scripts +idsc.dataverse +~~~~~~~~~~~~~~ + +This module can, among others, help you migrate one dataverse to another. (see `migrate.md `_) + +https://github.com/iza-institute-of-labor-economics/idsc.dataverse + Java ---- diff --git a/doc/sphinx-guides/source/api/auth.rst b/doc/sphinx-guides/source/api/auth.rst index bbc81b595e3..eae3bd3c969 100644 --- a/doc/sphinx-guides/source/api/auth.rst +++ b/doc/sphinx-guides/source/api/auth.rst @@ -77,6 +77,11 @@ To test if bearer tokens are working, you can try something like the following ( .. code-block:: bash - export TOKEN=`curl -s -X POST --location "http://keycloak.mydomain.com:8090/realms/oidc-realm/protocol/openid-connect/token" -H "Content-Type: application/x-www-form-urlencoded" -d "username=kcuser&password=kcpassword&grant_type=password&client_id=oidc-client&client_secret=ss6gE8mODCDfqesQaSG3gwUwZqZt547E" | jq '.access_token' -r | tr -d "\n"` + export TOKEN=`curl -s -X POST --location "http://keycloak.mydomain.com:8090/realms/test/protocol/openid-connect/token" -H "Content-Type: application/x-www-form-urlencoded" -d "username=user&password=user&grant_type=password&client_id=test&client_secret=94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8" | jq '.access_token' -r | tr -d "\n"` curl -H "Authorization: Bearer $TOKEN" http://localhost:8080/api/users/:me + +Signed URLs +----------- + +See :ref:`signed-urls`. diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst new file mode 100644 index 00000000000..025a2069d6e --- /dev/null +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -0,0 +1,27 @@ +API Changelog (Breaking Changes) +================================ + +This API changelog is experimental and we would love feedback on its usefulness. Its primary purpose is to inform API developers of any breaking changes. (We try not ship any backward incompatible changes, but it happens.) To see a list of new APIs and backward-compatible changes to existing API, please see each version's release notes at https://github.com/IQSS/dataverse/releases + +.. contents:: |toctitle| + :local: + :depth: 1 + +v6.2 +---- + +- The fields "northLongitude" and "southLongitude" have been deprecated in favor of "northLatitude" and "southLatitude" in the Geolocation metadata block. After upgrading to 6.2 or later, you will need to use the new fields when creating or updating a dataset. + +- **/api/datasets/{id}/versions/{versionId}**: The includeFiles parameter has been renamed to excludeFiles. The default behavior remains the same, which is to include files. However, when excludeFiles is set to true, the files will be excluded. A bug that caused the API to only return a deaccessioned dataset if the user had edit privileges has been fixed. +- **/api/datasets/{id}/versions**: The includeFiles parameter has been renamed to excludeFiles. The default behavior remains the same, which is to include files. However, when excludeFiles is set to true, the files will be excluded. +- **/api/files/$ID/uningest**: Can now be used by users with the ability to publish the dataset to undo a failed ingest. (Removing a successful ingest still requires being superuser) + +v6.1 +---- + +- The metadata field "Alternative Title" now supports multiple values so you must pass an array rather than a string when populating that field via API. See https://github.com/IQSS/dataverse/pull/9440 + +v6.0 +---- + +- **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch. See :doc:`dataaccess`. \ No newline at end of file diff --git a/doc/sphinx-guides/source/api/client-libraries.rst b/doc/sphinx-guides/source/api/client-libraries.rst index 62069f62c23..bd0aa55ba99 100755 --- a/doc/sphinx-guides/source/api/client-libraries.rst +++ b/doc/sphinx-guides/source/api/client-libraries.rst @@ -24,7 +24,7 @@ Java https://github.com/IQSS/dataverse-client-java is the official Java library for Dataverse APIs. -`Richard Adams `_ from `ResearchSpace `_ created and maintains this library. +`Richard Adams `_ from `ResearchSpace `_ created and maintains this library. Javascript ---------- @@ -52,20 +52,25 @@ There are multiple Python modules for interacting with Dataverse APIs. `EasyDataverse `_ is a Python library designed to simplify the management of Dataverse datasets in an object-oriented way, giving users the ability to upload, download, and update datasets with ease. By utilizing metadata block configurations, EasyDataverse automatically generates Python objects that contain all the necessary details required to create the native Dataverse JSON format used to create or edit datasets. Adding files and directories is also possible with EasyDataverse and requires no additional API calls. This library is particularly well-suited for client applications such as workflows and scripts as it minimizes technical complexities and facilitates swift development. -`pyDataverse `_ primarily allows developers to manage Dataverse collections, datasets and datafiles. Its intention is to help with data migrations and DevOps activities such as testing and configuration management. The module is developed by `Stefan Kasberger `_ from `AUSSDA - The Austrian Social Science Data Archive `_. +`python-dvuploader `_ implements Jim Myers' excellent `dv-uploader `_ as a Python module. It offers parallel direct uploads to Dataverse backend storage, streams files directly instead of buffering them in memory, and supports multi-part uploads, chunking data accordingly. + +`pyDataverse `_ primarily allows developers to manage Dataverse collections, datasets and datafiles. Its intention is to help with data migrations and DevOps activities such as testing and configuration management. The module is developed by `Stefan Kasberger `_ from `AUSSDA - The Austrian Social Science Data Archive `_. + +`UBC's Dataverse Utilities `_ are a set of Python console utilities which allow one to upload datasets from a tab-separated-value spreadsheet, bulk release multiple datasets, bulk delete unpublished datasets, quickly duplicate records. replace licenses, and more. For additional information see their `PyPi page `_. `dataverse-client-python `_ had its initial release in 2015. `Robert Liebowitz `_ created this library while at the `Center for Open Science (COS) `_ and the COS uses it to integrate the `Open Science Framework (OSF) `_ with Dataverse installations via an add-on which itself is open source and listed on the :doc:`/api/apps` page. `Pooch `_ is a Python library that allows library and application developers to download data. Among other features, it takes care of various protocols, caching in OS-specific locations, checksum verification and adds optional features like progress bars or log messages. Among other popular repositories, Pooch supports Dataverse in the sense that you can reference Dataverse-hosted datasets by just a DOI and Pooch will determine the data repository type, query the Dataverse API for contained files and checksums, giving you an easy interface to download them. +`idsc.dataverse `_ reads metadata and files of datasets from a dataverse dataverse.example1.com and writes them into ~/.idsc/dataverse/api/dataverse.example1.com organized in directories PID_type/prefix/suffix, where PID_type is one of: hdl, doi or ark. It can then ''export'' the local copy of the dataverse from ~/.idsc/dataverse/api/dataverse.example1.com to ~/.idsc/.cache/dataverse.example2.com so that one can upload them to dataverse.example2.com. + R - https://github.com/IQSS/dataverse-client-r is the official R package for Dataverse APIs. The latest release can be installed from `CRAN `_. The R client can search and download datasets. It is useful when automatically (instead of manually) downloading data files as part of a script. For bulk edit and upload operations, we currently recommend pyDataverse. -The package is currently maintained by `Shiro Kuriwaki `_. It was originally created by `Thomas Leeper `_ and then formerly maintained by `Will Beasley `_. - +The package is currently maintained by `Shiro Kuriwaki `_. It was originally created by `Thomas Leeper `_ and then formerly maintained by `Will Beasley `_. Ruby ---- diff --git a/doc/sphinx-guides/source/api/dataaccess.rst b/doc/sphinx-guides/source/api/dataaccess.rst index e76ea167587..f7aaa8f4ee4 100755 --- a/doc/sphinx-guides/source/api/dataaccess.rst +++ b/doc/sphinx-guides/source/api/dataaccess.rst @@ -83,7 +83,7 @@ Basic access URI: ``/api/access/datafile/$id`` -.. note:: Files can be accessed using persistent identifiers. This is done by passing the constant ``:persistentId`` where the numeric id of the file is expected, and then passing the actual persistent id as a query parameter with the name ``persistentId``. +.. note:: Files can be accessed using persistent identifiers. This is done by passing the constant ``:persistentId`` where the numeric id of the file is expected, and then passing the actual persistent id as a query parameter with the name ``persistentId``. However, this file access method is only effective when the FilePIDsEnabled option is enabled, which can be authorized by the admin. For further information, refer to :ref:`:FilePIDsEnabled`. Example: Getting the file whose DOI is *10.5072/FK2/J8SJZB* :: @@ -403,3 +403,32 @@ This method returns a list of Authenticated Users who have requested access to t A curl example using an ``id``:: curl -H "X-Dataverse-key:$API_TOKEN" -X GET http://$SERVER/api/access/datafile/{id}/listRequests + +User Has Requested Access to a File: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``/api/access/datafile/{id}/userFileAccessRequested`` + +This method returns true or false depending on whether or not the calling user has requested access to a particular file. + +A curl example using an ``id``:: + + curl -H "X-Dataverse-key:$API_TOKEN" -X GET "http://$SERVER/api/access/datafile/{id}/userFileAccessRequested" + + +Get User Permissions on a File: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``/api/access/datafile/{id}/userPermissions`` + +This method returns the permissions that the calling user has on a particular file. + +In particular, the user permissions that this method checks, returned as booleans, are the following: + +* Can download the file +* Can manage the file permissions +* Can edit the file owner dataset + +A curl example using an ``id``:: + + curl -H "X-Dataverse-key:$API_TOKEN" -X GET "http://$SERVER/api/access/datafile/{id}/userPermissions" diff --git a/doc/sphinx-guides/source/api/external-tools.rst b/doc/sphinx-guides/source/api/external-tools.rst index 05affaf975e..ae0e44b36aa 100644 --- a/doc/sphinx-guides/source/api/external-tools.rst +++ b/doc/sphinx-guides/source/api/external-tools.rst @@ -11,7 +11,7 @@ Introduction External tools are additional applications the user can access or open from your Dataverse installation to preview, explore, and manipulate data files and datasets. The term "external" is used to indicate that the tool is not part of the main Dataverse Software. -Once you have created the external tool itself (which is most of the work!), you need to teach a Dataverse installation how to construct URLs that your tool needs to operate. For example, if you've deployed your tool to fabulousfiletool.com your tool might want the ID of a file and the siteUrl of the Dataverse installation like this: https://fabulousfiletool.com?fileId=42&siteUrl=http://demo.dataverse.org +Once you have created the external tool itself (which is most of the work!), you need to teach a Dataverse installation how to construct URLs that your tool needs to operate. For example, if you've deployed your tool to fabulousfiletool.com your tool might want the ID of a file and the siteUrl of the Dataverse installation like this: https://fabulousfiletool.com?fileId=42&siteUrl=https://demo.dataverse.org In short, you will be creating a manifest in JSON format that describes not only how to construct URLs for your tool, but also what types of files your tool operates on, where it should appear in the Dataverse installation web interfaces, etc. @@ -40,7 +40,7 @@ How External Tools Are Presented to Users An external tool can appear in your Dataverse installation in a variety of ways: - as an explore, preview, query or configure option for a file -- as an explore option for a dataset +- as an explore or configure option for a dataset - as an embedded preview on the file landing page See also the :ref:`testing-external-tools` section of the Admin Guide for some perspective on how Dataverse installations will expect to test your tool before announcing it to their users. @@ -88,11 +88,11 @@ Terminology displayName The **name** of the tool in the Dataverse installation web interface. For example, "Data Explorer". - description The **description** of the tool, which appears in a popup (for configure tools only) so the user who clicked the tool can learn about the tool before being redirected the tool in a new tab in their browser. HTML is supported. + description The **description** of the tool, which appears in a popup (for configure tools only) so the user who clicked the tool can learn about the tool before being redirected to the tool in a new tab in their browser. HTML is supported. scope Whether the external tool appears and operates at the **file** level or the **dataset** level. Note that a file level tool much also specify the type of file it operates on (see "contentType" below). - types Whether the external tool is an **explore** tool, a **preview** tool, a **query** tool, a **configure** tool or any combination of these (multiple types are supported for a single tool). Configure tools require an API token because they make changes to data files (files within datasets). Configure tools are currently not supported at the dataset level. The older "type" keyword that allows you to pass a single type as a string is deprecated but still supported. + types Whether the external tool is an **explore** tool, a **preview** tool, a **query** tool, a **configure** tool or any combination of these (multiple types are supported for a single tool). Configure tools require an API token because they make changes to data files (files within datasets). The older "type" keyword that allows you to pass a single type as a string is deprecated but still supported. toolUrl The **base URL** of the tool before query parameters are added. @@ -102,7 +102,7 @@ Terminology httpMethod Either ``GET`` or ``POST``. - queryParameters **Key/value combinations** that can be appended to the toolUrl. For example, once substitution takes place (described below) the user may be redirected to ``https://fabulousfiletool.com?fileId=42&siteUrl=http://demo.dataverse.org``. + queryParameters **Key/value combinations** that can be appended to the toolUrl. For example, once substitution takes place (described below) the user may be redirected to ``https://fabulousfiletool.com?fileId=42&siteUrl=https://demo.dataverse.org``. query parameter keys An **arbitrary string** to associate with a value that is populated with a reserved word (described below). As the author of the tool, you have control over what "key" you would like to be passed to your tool. For example, if you want to have your tool receive and operate on the query parameter "dataverseFileId=42" instead of just "fileId=42", that's fine. @@ -160,17 +160,25 @@ Authorization Options When called for datasets or data files that are not public (i.e. in a draft dataset or for a restricted file), external tools are allowed access via the user's credentials. This is accomplished by one of two mechanisms: -* Signed URLs (more secure, recommended) +.. _signed-urls: - - Configured via the ``allowedApiCalls`` section of the manifest. The tool will be provided with signed URLs allowing the specified access to the given dataset or datafile for the specified amount of time. The tool will not be able to access any other datasets or files the user may have access to and will not be able to make calls other than those specified. - - For tools invoked via a GET call, Dataverse will include a callback query parameter with a Base64 encoded value. The decoded value is a signed URL that can be called to retrieve a JSON response containing all of the queryParameters and allowedApiCalls specified in the manfiest. - - For tools invoked via POST, Dataverse will send a JSON body including the requested queryParameters and allowedApiCalls. Dataverse expects the response to the POST to indicate a redirect which Dataverse will use to open the tool. +Signed URLs +^^^^^^^^^^^ -* API Token (deprecated, less secure, not recommended) +The signed URL mechanism is more secure than exposing API tokens and therefore recommended. - - Configured via the ``queryParameters`` by including an ``{apiToken}`` value. When this is present Dataverse will send the user's apiToken to the tool. With the user's API token, the tool can perform any action via the Dataverse API that the user could. External tools configured via this method should be assessed for their trustworthiness. - - For tools invoked via GET, this will be done via a query parameter in the request URL which could be cached in the browser's history. Dataverse expects the response to the POST to indicate a redirect which Dataverse will use to open the tool. - - For tools invoked via POST, Dataverse will send a JSON body including the apiToken. +- Configured via the ``allowedApiCalls`` section of the manifest. The tool will be provided with signed URLs allowing the specified access to the given dataset or datafile for the specified amount of time. The tool will not be able to access any other datasets or files the user may have access to and will not be able to make calls other than those specified. +- For tools invoked via a GET call, Dataverse will include a callback query parameter with a Base64 encoded value. The decoded value is a signed URL that can be called to retrieve a JSON response containing all of the queryParameters and allowedApiCalls specified in the manfiest. +- For tools invoked via POST, Dataverse will send a JSON body including the requested queryParameters and allowedApiCalls. Dataverse expects the response to the POST to indicate a redirect which Dataverse will use to open the tool. + +API Token +^^^^^^^^^ + +The API token mechanism is deprecated. Because it is less secure than signed URLs, it is not recommended for new external tools. + +- Configured via the ``queryParameters`` by including an ``{apiToken}`` value. When this is present Dataverse will send the user's apiToken to the tool. With the user's API token, the tool can perform any action via the Dataverse API that the user could. External tools configured via this method should be assessed for their trustworthiness. +- For tools invoked via GET, this will be done via a query parameter in the request URL which could be cached in the browser's history. Dataverse expects the response to the POST to indicate a redirect which Dataverse will use to open the tool. +- For tools invoked via POST, Dataverse will send a JSON body including the apiToken. Internationalization of Your External Tool ++++++++++++++++++++++++++++++++++++++++++ @@ -187,6 +195,7 @@ Using Example Manifests to Get Started ++++++++++++++++++++++++++++++++++++++ Again, you can use :download:`fabulousFileTool.json <../_static/installation/files/root/external-tools/fabulousFileTool.json>` or :download:`dynamicDatasetTool.json <../_static/installation/files/root/external-tools/dynamicDatasetTool.json>` as a starting point for your own manifest file. +Additional working examples, including ones using :ref:`signed-urls`, are available at https://github.com/gdcc/dataverse-previewers . Testing Your External Tool -------------------------- diff --git a/doc/sphinx-guides/source/api/getting-started.rst b/doc/sphinx-guides/source/api/getting-started.rst index a6f6c259a25..c12fb01a269 100644 --- a/doc/sphinx-guides/source/api/getting-started.rst +++ b/doc/sphinx-guides/source/api/getting-started.rst @@ -9,7 +9,7 @@ If you are a researcher or curator who wants to automate parts of your workflow, Servers You Can Test With ------------------------- -Rather than using a production Dataverse installation, API users are welcome to use http://demo.dataverse.org for testing. You can email support@dataverse.org if you have any trouble with this server. +Rather than using a production Dataverse installation, API users are welcome to use https://demo.dataverse.org for testing. You can email support@dataverse.org if you have any trouble with this server. If you would rather have full control over your own test server, deployments to AWS, Docker, and more are covered in the :doc:`/developers/index` and the :doc:`/installation/index`. @@ -86,7 +86,7 @@ See :ref:`create-dataset-command`. Uploading Files ~~~~~~~~~~~~~~~ -See :ref:`add-file-api`. +See :ref:`add-file-api`. In addition, when a Dataverse installation is configured to use S3 storage with direct upload enabled, there is API support to send a file directly to S3. This facilitates an efficient method to upload big files, but is more complex. The procedure is described in the :doc:`/developers/s3-direct-upload-api` section of the Developer Guide. Publishing a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/api/index.rst b/doc/sphinx-guides/source/api/index.rst index c9e79098546..dd195aa9d62 100755 --- a/doc/sphinx-guides/source/api/index.rst +++ b/doc/sphinx-guides/source/api/index.rst @@ -24,3 +24,4 @@ API Guide linkeddatanotification apps faq + changelog \ No newline at end of file diff --git a/doc/sphinx-guides/source/api/intro.rst b/doc/sphinx-guides/source/api/intro.rst index 933932cd7b9..8eb11798dd7 100755 --- a/doc/sphinx-guides/source/api/intro.rst +++ b/doc/sphinx-guides/source/api/intro.rst @@ -187,6 +187,10 @@ Lists of Dataverse APIs - Files - etc. +- :doc:`/developers/dataset-semantic-metadata-api`: For creating, reading, editing, and deleting dataset metadata using JSON-LD. +- :doc:`/developers/dataset-migration-api`: For migrating datasets from other repositories while retaining the original persistent identifiers and publication date. +- :doc:`/developers/s3-direct-upload-api`: For the transfer of larger files/larger numbers of files directly to an S3 bucket managed by Dataverse. +- :doc:`/developers/globus-api`: For the Globus transfer of larger files/larger numbers of files directly via Globus endpoints managed by Dataverse or referencing files in remote endpoints. - :doc:`metrics`: For query statistics about usage of a Dataverse installation. - :doc:`sword`: For depositing data using a standards-based approach rather than the :doc:`native-api`. @@ -237,7 +241,7 @@ Dataverse Software API questions are on topic in all the usual places: - The dataverse-community Google Group: https://groups.google.com/forum/#!forum/dataverse-community - The Dataverse Project community calls: https://dataverse.org/community-calls -- The Dataverse Project chat room: http://chat.dataverse.org +- The Dataverse Project chat room: https://chat.dataverse.org - The Dataverse Project ticketing system: support@dataverse.org After your question has been answered, you are welcome to help improve the :doc:`faq` section of this guide. diff --git a/doc/sphinx-guides/source/api/metrics.rst b/doc/sphinx-guides/source/api/metrics.rst index 28ac33ea228..14402096650 100755 --- a/doc/sphinx-guides/source/api/metrics.rst +++ b/doc/sphinx-guides/source/api/metrics.rst @@ -1,7 +1,7 @@ Metrics API =========== -The Metrics API provides counts of downloads, datasets created, files uploaded, and more, as described below. The Dataverse Software also includes aggregate counts of Make Data Count metrics (described in the :doc:`/admin/make-data-count` section of the Admin Guide and available per-Dataset through the :doc:`/api/native-api`). A table of all the endpoints is listed below. +The Metrics API provides counts of downloads, datasets created, files uploaded, user accounts created, and more, as described below. The Dataverse Software also includes aggregate counts of Make Data Count metrics (described in the :doc:`/admin/make-data-count` section of the Admin Guide and available per-Dataset through the :doc:`/api/native-api`). A table of all the endpoints is listed below. .. contents:: |toctitle| :local: @@ -21,7 +21,7 @@ The Metrics API includes several categories of endpoints that provide different * Form: GET https://$SERVER/api/info/metrics/$type - * where ``$type`` can be set, for example, to ``dataverses`` (Dataverse collections), ``datasets``, ``files`` or ``downloads``. + * where ``$type`` can be set, for example, to ``dataverses`` (Dataverse collections), ``datasets``, ``files``, ``downloads`` or ``accounts``. * Example: ``curl https://demo.dataverse.org/api/info/metrics/downloads`` @@ -31,7 +31,7 @@ The Metrics API includes several categories of endpoints that provide different * Form: GET https://$SERVER/api/info/metrics/$type/toMonth/$YYYY-DD - * where ``$type`` can be set, for example, to ``dataverses`` (Dataverse collections), ``datasets``, ``files`` or ``downloads``. + * where ``$type`` can be set, for example, to ``dataverses`` (Dataverse collections), ``datasets``, ``files``, ``downloads`` or ``accounts``. * Example: ``curl https://demo.dataverse.org/api/info/metrics/dataverses/toMonth/2018-01`` @@ -41,7 +41,7 @@ The Metrics API includes several categories of endpoints that provide different * Form: GET https://$SERVER/api/info/metrics/$type/pastDays/$days - * where ``$type`` can be set, for example, to ``dataverses`` (Dataverse collections), ``datasets``, ``files`` or ``downloads``. + * where ``$type`` can be set, for example, to ``dataverses`` (Dataverse collections), ``datasets``, ``files``, ``downloads`` or ``accounts``. * Example: ``curl https://demo.dataverse.org/api/info/metrics/datasets/pastDays/30`` @@ -51,7 +51,7 @@ The Metrics API includes several categories of endpoints that provide different * Form: GET https://$SERVER/api/info/metrics/$type/monthly - * where ``$type`` can be set, for example, to ``dataverses`` (Dataverse collections), ``datasets``, ``files`` or ``downloads``. + * where ``$type`` can be set, for example, to ``dataverses`` (Dataverse collections), ``datasets``, ``files``, ``downloads`` or ``accounts``. * Example: ``curl https://demo.dataverse.org/api/info/metrics/downloads/monthly`` @@ -163,3 +163,14 @@ The following table lists the available metrics endpoints (not including the Mak /api/info/metrics/uniquefiledownloads/toMonth/{yyyy-MM},"count by id, pid","json, csv",collection subtree,published,y,cumulative up to month specified,unique download counts per file id to the specified month. PIDs are also included in output if they exist /api/info/metrics/tree,"id, ownerId, alias, depth, name, children",json,collection subtree,published,y,"tree of dataverses starting at the root or a specified parentAlias with their id, owner id, alias, name, a computed depth, and array of children dataverses","underlying code can also include draft dataverses, this is not currently accessible via api, depth starts at 0" /api/info/metrics/tree/toMonth/{yyyy-MM},"id, ownerId, alias, depth, name, children",json,collection subtree,published,y,"tree of dataverses in existence as of specified date starting at the root or a specified parentAlias with their id, owner id, alias, name, a computed depth, and array of children dataverses","underlying code can also include draft dataverses, this is not currently accessible via api, depth starts at 0" + /api/info/metrics/accounts,count,json,Dataverse installation,all,y,as of now/totals, + /api/info/metrics/accounts/toMonth/{yyyy-MM},count,json,Dataverse installation,all,y,cumulative up to month specified, + /api/info/metrics/accounts/pastDays/{n},count,json,Dataverse installation,all,y,aggregate count for past n days, + /api/info/metrics/accounts/monthly,"date, count","json, csv",Dataverse installation,all,y,monthly cumulative timeseries from first date of first entry to now, + +Related API Endpoints +--------------------- + +The following endpoints are not under the metrics namespace but also return counts: + +- :ref:`file-download-count` diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 4d9466703e4..70d73ae3c98 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -9,7 +9,7 @@ The Dataverse Software exposes most of its GUI functionality via a REST-based AP .. _CORS: https://www.w3.org/TR/cors/ -.. warning:: The Dataverse Software's API is versioned at the URI - all API calls may include the version number like so: ``http://server-address/api/v1/...``. Omitting the ``v1`` part would default to the latest API version (currently 1). When writing scripts/applications that will be used for a long time, make sure to specify the API version, so they don't break when the API is upgraded. +.. warning:: The Dataverse Software's API is versioned at the URI - all API calls may include the version number like so: ``https://server-address/api/v1/...``. Omitting the ``v1`` part would default to the latest API version (currently 1). When writing scripts/applications that will be used for a long time, make sure to specify the API version, so they don't break when the API is upgraded. .. contents:: |toctitle| :local: @@ -88,6 +88,14 @@ The fully expanded example above (without environment variables) looks like this curl "https://demo.dataverse.org/api/dataverses/root" +If you want to include the Dataverse collections that this collection is part of, you must set ``returnOwners`` query parameter to ``true``. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/dataverses/root?returnOwners=true" + To view an unpublished Dataverse collection: .. code-block:: bash @@ -503,8 +511,58 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT "https://demo.dataverse.org/api/dataverses/root/metadatablocks/isRoot" -.. note:: Previous endpoints ``$SERVER/api/dataverses/$id/metadatablocks/:isRoot`` and ``POST http://$SERVER/api/dataverses/$id/metadatablocks/:isRoot?key=$apiKey`` are deprecated, but supported. +.. note:: Previous endpoints ``$SERVER/api/dataverses/$id/metadatablocks/:isRoot`` and ``POST https://$SERVER/api/dataverses/$id/metadatablocks/:isRoot?key=$apiKey`` are deprecated, but supported. + +.. _get-dataset-json-schema: + +Retrieve a Dataset JSON Schema for a Collection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Retrieves a JSON schema customized for a given collection in order to validate a dataset JSON file prior to creating the dataset. This +first version of the schema only includes required elements and fields. In the future we plan to improve the schema by adding controlled +vocabulary and more robust dataset field format testing: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/datasetSchema" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/datasetSchema" + +Note: you must have "Add Dataset" permission in the given collection to invoke this endpoint. + +While it is recommended to download a copy of the JSON Schema from the collection (as above) to account for any fields that have been marked as required, you can also download a minimal :download:`dataset-schema.json <../_static/api/dataset-schema.json>` to get a sense of the schema when no customizations have been made. + +.. _validate-dataset-json: + +Validate Dataset JSON File for a Collection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Validates a dataset JSON file customized for a given collection prior to creating the dataset. The validation only tests for json formatting +and the presence of required elements: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/dataverses/$ID/validateDatasetJson" -H 'Content-type:application/json' --upload-file dataset.json + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/dataverses/root/validateDatasetJson" -H 'Content-type:application/json' --upload-file dataset.json +Note: you must have "Add Dataset" permission in the given collection to invoke this endpoint. .. _create-dataset-command: @@ -525,10 +583,16 @@ Submit Incomplete Dataset ^^^^^^^^^^^^^^^^^^^^^^^^^ **Note:** This feature requires :ref:`dataverse.api.allow-incomplete-metadata` to be enabled and your Solr -Schema to be up-to-date with the ``datasetValid`` field. +Schema to be up-to-date with the ``datasetValid`` field. If not done yet with the version upgrade, you will +also need to reindex all dataset after enabling the :ref:`dataverse.api.allow-incomplete-metadata` feature. Providing a ``.../datasets?doNotValidate=true`` query parameter turns off the validation of metadata. -In this case, only the "Author Name" is required. For example, a minimal JSON file would look like this: +In this situation, only the "Author Name" is required, except for the case when the setting :ref:`:MetadataLanguages` +is configured and the value of "Dataset Metadata Language" setting of a collection is left with the default +"Chosen at Dataset Creation" value. In that case, a language that is a part of the :ref:`:MetadataLanguages` list must be +declared in the incomplete dataset. + +For example, a minimal JSON file, without the language specification, would look like this: .. code-block:: json :name: dataset-incomplete.json @@ -748,13 +812,53 @@ The following attributes are supported: * ``affiliation`` Affiliation * ``filePIDsEnabled`` ("true" or "false") Restricted to use by superusers and only when the :ref:`:AllowEnablingFilePIDsPerCollection <:AllowEnablingFilePIDsPerCollection>` setting is true. Enables or disables registration of file-level PIDs in datasets within the collection (overriding the instance-wide setting). +.. _collection-storage-quotas: + +Collection Storage Quotas +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/storage/quota" + +Will output the storage quota allocated (in bytes), or a message indicating that the quota is not defined for the specific collection. The user identified by the API token must have the ``Manage`` permission on the collection. + + +To set or change the storage allocation quota for a collection: + +.. code-block:: + + curl -X PUT -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/storage/quota/$SIZE_IN_BYTES" + +This is API is superuser-only. + + +To delete a storage quota configured for a collection: + +.. code-block:: + + curl -X DELETE -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/storage/quota" + +This is API is superuser-only. + +Use the ``/settings`` API to enable or disable the enforcement of storage quotas that are defined across the instance via the following setting. For example, + +.. code-block:: + + curl -X PUT -d 'true' http://localhost:8080/api/admin/settings/:UseStorageQuotas + Datasets -------- **Note** Creation of new datasets is done with a ``POST`` onto a Dataverse collection. See the Dataverse Collections section above. -**Note** In all commands below, dataset versions can be referred to as: +.. _dataset-version-specifiers: + +Dataset Version Specifiers +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In all commands below, dataset versions can be referred to as: * ``:draft`` the draft version, if any * ``:latest`` either a draft (if exists) or the latest published version. @@ -789,7 +893,7 @@ Getting its draft version: export SERVER_URL=https://demo.dataverse.org export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB - curl -H "X-Dataverse-key:$API_TOKEN" "http://$SERVER/api/datasets/:persistentId/versions/:draft?persistentId=$PERSISTENT_IDENTIFIER" + curl -H "X-Dataverse-key:$API_TOKEN" "https://$SERVER/api/datasets/:persistentId/versions/:draft?persistentId=$PERSISTENT_IDENTIFIER" The fully expanded example above (without environment variables) looks like this: @@ -814,6 +918,14 @@ The fully expanded example above (without environment variables) looks like this The dataset id can be extracted from the response retrieved from the API which uses the persistent identifier (``/api/datasets/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER``). +If you want to include the Dataverse collections that this dataset is part of, you must set ``returnOwners`` query parameter to ``true``. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24?returnOwners=true" + List Versions of a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -883,6 +995,10 @@ It returns a list of versions with their metadata, and file list: ] } +The optional ``includeFiles`` parameter specifies whether the files should be listed in the output. It defaults to ``true``, preserving backward compatibility. (Note that for a dataset with a large number of versions and/or files having the files included can dramatically increase the volume of the output). A separate ``/files`` API can be used for listing the files, or a subset thereof in a given version. + +The optional ``offset`` and ``limit`` parameters can be used to specify the range of the versions list to be shown. This can be used to paginate through the list in a dataset with a large number of versions. + Get Version of a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -895,13 +1011,34 @@ Get Version of a Dataset export ID=24 export VERSION=1.0 - curl "$SERVER_URL/api/datasets/$ID/versions/$VERSION" + curl "$SERVER_URL/api/datasets/$ID/versions/$VERSION?includeFiles=false" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl "https://demo.dataverse.org/api/datasets/24/versions/1.0" + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0?includeFiles=false" + +The optional ``includeFiles`` parameter specifies whether the files should be listed in the output (defaults to ``true``). Note that a separate ``/files`` API can be used for listing the files, or a subset thereof in a given version. + + +By default, deaccessioned dataset versions are not included in the search when applying the :latest or :latest-published identifiers. Additionally, when filtering by a specific version tag, you will get a "not found" error if the version is deaccessioned and you do not enable the ``includeDeaccessioned`` option described below. + +If you want to include deaccessioned dataset versions, you must set ``includeDeaccessioned`` query parameter to ``true``. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0?includeDeaccessioned=true" + +If you want to include the Dataverse collections that this dataset version is part of, you must set ``returnOwners`` query parameter to ``true``. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0?returnOwners=true" .. _export-dataset-metadata-api: @@ -958,6 +1095,180 @@ The fully expanded example above (without environment variables) looks like this curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files" +This endpoint supports optional pagination, through the ``limit`` and ``offset`` query parameters. + +To aid in pagination the JSON response also includes the total number of rows (totalCount) available. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files?limit=10&offset=20" + +Category name filtering is also optionally supported. To return files to which the requested category has been added. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files?categoryName=Data" + +Tabular tag name filtering is also optionally supported. To return files to which the requested tabular tag has been added. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files?tabularTagName=Survey" + +Content type filtering is also optionally supported. To return files matching the requested content type. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files?contentType=image/png" + +Filtering by search text is also optionally supported. The search will be applied to the labels and descriptions of the dataset files, to return the files that contain the text searched in one of such fields. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files?searchText=word" + +File access filtering is also optionally supported. In particular, by the following possible values: + +* ``Public`` +* ``Restricted`` +* ``EmbargoedThenRestricted`` +* ``EmbargoedThenPublic`` + +If no filter is specified, the files will match all of the above categories. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files?accessStatus=Public" + +Ordering criteria for sorting the results is also optionally supported. In particular, by the following possible values: + +* ``NameAZ`` (Default) +* ``NameZA`` +* ``Newest`` +* ``Oldest`` +* ``Size`` +* ``Type`` + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files?orderCriteria=Newest" + +Please note that both filtering and ordering criteria values are case sensitive and must be correctly typed for the endpoint to recognize them. + +By default, deaccessioned dataset versions are not included in the search when applying the :latest or :latest-published identifiers. Additionally, when filtering by a specific version tag, you will get a "not found" error if the version is deaccessioned and you do not enable the ``includeDeaccessioned`` option described below. + +If you want to include deaccessioned dataset versions, you must set ``includeDeaccessioned`` query parameter to ``true``. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files?includeDeaccessioned=true" + +.. note:: Keep in mind that you can combine all of the above query parameters depending on the results you are looking for. + +Get File Counts in a Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Get file counts, for the given dataset and version. + +The returned file counts are based on different criteria: + +- Total (The total file count) +- Per content type +- Per category name +- Per tabular tag name +- Per access status (Possible values: Public, Restricted, EmbargoedThenRestricted, EmbargoedThenPublic) + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export ID=24 + export VERSION=1.0 + + curl "$SERVER_URL/api/datasets/$ID/versions/$VERSION/files/counts" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files/counts" + +Category name filtering is optionally supported. To return counts only for files to which the requested category has been added. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files/counts?categoryName=Data" + +Tabular tag name filtering is also optionally supported. To return counts only for files to which the requested tabular tag has been added. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files/counts?tabularTagName=Survey" + +Content type filtering is also optionally supported. To return counts only for files matching the requested content type. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files/counts?contentType=image/png" + +Filtering by search text is also optionally supported. The search will be applied to the labels and descriptions of the dataset files, to return counts only for files that contain the text searched in one of such fields. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files/counts?searchText=word" + +File access filtering is also optionally supported. In particular, by the following possible values: + +* ``Public`` +* ``Restricted`` +* ``EmbargoedThenRestricted`` +* ``EmbargoedThenPublic`` + +If no filter is specified, the files will match all of the above categories. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files/counts?accessStatus=Public" + +By default, deaccessioned dataset versions are not supported by this endpoint and will be ignored in the search when applying the :latest or :latest-published identifiers. Additionally, when filtering by a specific version tag, you will get a not found error if the version is deaccessioned and you do not enable the option described below. + +If you want to include deaccessioned dataset versions, you must specify this through the ``includeDeaccessioned`` query parameter. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files/counts?includeDeaccessioned=true" + +Please note that filtering values are case sensitive and must be correctly typed for the endpoint to recognize them. + +Keep in mind that you can combine all of the above query parameters depending on the results you are looking for. + View Dataset Files and Folders as a Directory Index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1254,11 +1565,44 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/datasets/24/versions/:draft" +Deaccession Dataset +~~~~~~~~~~~~~~~~~~~ + +Given a version of a dataset, updates its status to deaccessioned. + +The JSON body required to deaccession a dataset (``deaccession.json``) looks like this:: + + { + "deaccessionReason": "Description of the deaccession reason.", + "deaccessionForwardURL": "https://demo.dataverse.org" + } + + +Note that the field ``deaccessionForwardURL`` is optional. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=24 + export VERSIONID=1.0 + export FILE_PATH=deaccession.json + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/datasets/$ID/versions/$VERSIONID/deaccession" -H "Content-type:application/json" --upload-file $FILE_PATH + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/datasets/24/versions/1.0/deaccession" -H "Content-type:application/json" --upload-file deaccession.json + +.. note:: You cannot deaccession a dataset more than once. If you call this endpoint twice for the same dataset version, you will get a not found error on the second call, since the dataset you are looking for will no longer be published since it is already deaccessioned. + Set Citation Date Field Type for a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Sets the dataset citation date field type for a given dataset. ``:publicationDate`` is the default. -Note that the dataset citation date field type must be a date field. +Sets the dataset citation date field type for a given dataset. ``:publicationDate`` is the default. +Note that the dataset citation date field type must be a date field. This change applies to all versions of the dataset that have an entry for the new date field. It also applies to all file citations in the dataset. .. code-block:: bash @@ -1667,6 +2011,73 @@ The fully expanded example above (without environment variables) looks like this The size of all files available for download will be returned. If :draft is passed as versionId the token supplied must have permission to view unpublished drafts. A token is not required for published datasets. Also restricted files will be included in this total regardless of whether the user has access to download the restricted file(s). +There is an optional query parameter ``mode`` which applies a filter criteria to the operation. This parameter supports the following values: + +* ``All`` (Default): Includes both archival and original sizes for tabular files +* ``Archival``: Includes only the archival size for tabular files +* ``Original``: Includes only the original size for tabular files + +Usage example: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?mode=Archival" + +Category name filtering is also optionally supported. To return the size of all files available for download matching the requested category name. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?categoryName=Data" + +Tabular tag name filtering is also optionally supported. To return the size of all files available for download for which the requested tabular tag has been added. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?tabularTagName=Survey" + +Content type filtering is also optionally supported. To return the size of all files available for download matching the requested content type. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?contentType=image/png" + +Filtering by search text is also optionally supported. The search will be applied to the labels and descriptions of the dataset files, to return the size of all files available for download that contain the text searched in one of such fields. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?searchText=word" + +File access filtering is also optionally supported. In particular, by the following possible values: + +* ``Public`` +* ``Restricted`` +* ``EmbargoedThenRestricted`` +* ``EmbargoedThenPublic`` + +If no filter is specified, the files will match all of the above categories. + +Please note that filtering query parameters are case sensitive and must be correctly typed for the endpoint to recognize them. + +By default, deaccessioned dataset versions are not included in the search when applying the :latest or :latest-published identifiers. Additionally, when filtering by a specific version tag, you will get a "not found" error if the version is deaccessioned and you do not enable the ``includeDeaccessioned`` option described below. + +If you want to include deaccessioned dataset versions, you must set ``includeDeaccessioned`` query parameter to ``true``. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/downloadsize?includeDeaccessioned=true" + +.. note:: Keep in mind that you can combine all of the above query parameters depending on the results you are looking for. + Submit a Dataset for Review ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1715,7 +2126,8 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/datasets/:persistentId/returnToAuthor?persistentId=doi:10.5072/FK2/J8SJZB" -H "Content-type: application/json" -d @reason-for-return.json -The review process can sometimes resemble a tennis match, with the authors submitting and resubmitting the dataset over and over until the curators are satisfied. Each time the curators send a "reason for return" via API, that reason is persisted into the database, stored at the dataset version level. +The review process can sometimes resemble a tennis match, with the authors submitting and resubmitting the dataset over and over until the curators are satisfied. Each time the curators send a "reason for return" via API, that reason is sent by email and is persisted into the database, stored at the dataset version level. +The reason is required, please note that you can still type a creative and meaningful comment such as "The author would like to modify his dataset", "Files are missing", "Nothing to report" or "A curation report with comments and suggestions/instructions will follow in another email" that suits your situation. The :ref:`send-feedback` API call may be useful as a way to move the conversation to email. However, note that these emails go to contacts (versus authors) and there is no database record of the email contents. (:ref:`dataverse.mail.cc-support-on-contact-email` will send a copy of these emails to the support email address which would provide a record.) @@ -2088,10 +2500,12 @@ The API call requires a Json body that includes the list of the fileIds that the curl -H "X-Dataverse-key: $API_TOKEN" -H "Content-Type:application/json" "$SERVER_URL/api/datasets/:persistentId/files/actions/:unset-embargo?persistentId=$PERSISTENT_IDENTIFIER" -d "$JSON" +.. _Archival Status API: + Get the Archival Status of a Dataset By Version ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Archiving is an optional feature that may be configured for a Dataverse installation. When that is enabled, this API call be used to retrieve the status. Note that this requires "superuser" credentials. +Archival :ref:`BagIt Export` is an optional feature that may be configured for a Dataverse installation. When that is enabled, this API call be used to retrieve the status. Note that this requires "superuser" credentials. ``GET /api/datasets/$dataset-id/$version/archivalStatus`` returns the archival status of the specified dataset version. @@ -2171,11 +2585,11 @@ Signposting involves the addition of a `Link ;rel="cite-as", ;rel="describedby";type="application/vnd.citationstyles.csl+json",;rel="describedby";type="application/json+ld", ;rel="type",;rel="type", https://demo.dataverse.org/api/datasets/:persistentId/versions/1.0/customlicense?persistentId=doi:10.5072/FK2/YD5QDG;rel="license", ; rel="linkset";type="application/linkset+json"`` +``Link: ;rel="cite-as", ;rel="describedby";type="application/vnd.citationstyles.csl+json",;rel="describedby";type="application/ld+json", ;rel="type",;rel="type", ;rel="license", ; rel="linkset";type="application/linkset+json"`` The URL for linkset information is discoverable under the ``rel="linkset";type="application/linkset+json`` entry in the "Link" header, such as in the example above. -The reponse includes a JSON object conforming to the `Signposting `__ specification. +The reponse includes a JSON object conforming to the `Signposting `__ specification. As part of this conformance, unlike most Dataverse API responses, the output is not wrapped in a ``{"status":"OK","data":{`` object. Signposting is not supported for draft dataset versions. .. code-block:: bash @@ -2196,6 +2610,17 @@ Get Dataset By Private URL Token curl "$SERVER_URL/api/datasets/privateUrlDatasetVersion/$PRIVATE_URL_TOKEN" +If you want to include the Dataverse collections that this dataset is part of, you must set ``returnOwners`` query parameter to ``true``. + +Usage example: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/privateUrlDatasetVersion/a56444bc-7697-4711-8964-e0577f055fd2?returnOwners=true" + + +.. _get-citation: + Get Citation ~~~~~~~~~~~~ @@ -2207,10 +2632,20 @@ Get Citation curl -H "Accept:application/json" "$SERVER_URL/api/datasets/:persistentId/versions/$VERSION/{version}/citation?persistentId=$PERSISTENT_IDENTIFIER" -Get Citation by Private URL Token -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +By default, deaccessioned dataset versions are not included in the search when applying the :latest or :latest-published identifiers. Additionally, when filtering by a specific version tag, you will get a "not found" error if the version is deaccessioned and you do not enable the ``includeDeaccessioned`` option described below. -.. code-block:: bash +If you want to include deaccessioned dataset versions, you must set ``includeDeaccessioned`` query parameter to ``true``. + +Usage example: + +.. code-block:: bash + + curl -H "Accept:application/json" "$SERVER_URL/api/datasets/:persistentId/versions/$VERSION/{version}/citation?persistentId=$PERSISTENT_IDENTIFIER&includeDeaccessioned=true" + +Get Citation by Private URL Token +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: bash export SERVER_URL=https://demo.dataverse.org export PRIVATE_URL_TOKEN=a56444bc-7697-4711-8964-e0577f055fd2 @@ -2230,13 +2665,150 @@ See :ref:`:CustomDatasetSummaryFields` in the Installation Guide for how the lis curl "$SERVER_URL/api/datasets/summaryFieldNames" +.. _guestbook-at-request-api: + +Configure When a Dataset Guestbook Appears (If Enabled) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By default, users are asked to fill out a configured Guestbook when they down download files from a dataset. If enabled for a given Dataverse instance (see XYZ), users may instead be asked to fill out a Guestbook only when they request access to restricted files. +This is configured by a global default, collection-level settings, or directly at the dataset level via these API calls (superuser access is required to make changes). + +To see the current choice for this dataset: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/YD5QDG + + curl "$SERVER_URL/api/datasets/:persistentId/guestbookEntryAtRequest?persistentId=$PERSISTENT_IDENTIFIER" + + + The response will be true (guestbook displays when making a request), false (guestbook displays at download), or will indicate that the dataset inherits one of these settings. + +To set the behavior for this dataset: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/YD5QDG + + curl -X PUT -H "X-Dataverse-key:$API_TOKEN" -H Content-type:application/json -d true "$SERVER_URL/api/datasets/:persistentId/guestbookEntryAtRequest?persistentId=$PERSISTENT_IDENTIFIER" + + + This example uses true to set the behavior to guestbook at request. Note that this call will return a 403/Forbidden response if guestbook at request functionality is not enabled for this Dataverse instance. + +The API can also be used to reset the dataset to use the default/inherited value: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/YD5QDG + + curl -X DELETE -H "X-Dataverse-key:$API_TOKEN" -H Content-type:application/json "$SERVER_URL/api/datasets/:persistentId/guestbookEntryAtRequest?persistentId=$PERSISTENT_IDENTIFIER" + +Get User Permissions on a Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This API call returns the permissions that the calling user has on a particular dataset. + +In particular, the user permissions that this API call checks, returned as booleans, are the following: + +* Can view the unpublished dataset +* Can edit the dataset +* Can publish the dataset +* Can manage the dataset permissions +* Can delete the dataset draft + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=24 + + curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/datasets/$ID/userPermissions" + +Know If a User Can Download at Least One File from a Dataset Version +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This API endpoint indicates if the calling user can download at least one file from a dataset version. Note that permissions based on :ref:`shib-groups` are not considered. + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export ID=24 + export VERSION=1.0 + + curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/datasets/$ID/versions/$VERSION/canDownloadAtLeastOneFile" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/datasets/24/versions/1.0/canDownloadAtLeastOneFile" + +.. _dataset-pid-generator: + +Configure The PID Generator a Dataset Uses (If Enabled) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Dataverse can be configured to use multiple PID Providers (see the :ref:`pids-configuration` section for more information). +When there are multiple PID Providers and File PIDs are enabled, it is possible to set which provider will be used to generate (mint) those PIDs. +While it usually makes sense to use the same PID Provider that manages the dataset PID, there are cases, specifically if the PID Provider for the dataset PID cannot generate +other PIDs with the same authority/shoulder, etc. as in the dataset PID, where another Provider is needed. Dataverse has a set of API calls to see what PID provider will be +used to generate datafile PIDs and, as a superuser, to change it (to a new one or back to a default). + +To see the current choice for this dataset: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/YD5QDG + + curl "$SERVER_URL/api/datasets/:persistentId/pidGenerator?persistentId=$PERSISTENT_IDENTIFIER" + +The response will be the id of the PID Provider that will be used. Details of that provider's configration can be obtained via the :ref:`pids-providers-api`. + +To set the behavior for this dataset: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/YD5QDG + export GENERATOR_ID=perma1 + + curl -X PUT -H "X-Dataverse-key:$API_TOKEN" -H Content-type:application/json -d $GENERATOR_ID "$SERVER_URL/api/datasets/:persistentId/pidGenerator?persistentId=$PERSISTENT_IDENTIFIER" + + +The PID Provider id used must be one of the those configured - see :ref:`pids-providers-api`. +The return status code may be 200/OK, 401/403 if an api key is not sent or the user is not a superuser, or 404 if the dataset or PID provider are not found. +Note that using a PIDProvider that generates DEPENDENT datafile PIDs that doesn't share the dataset PID's protocol/authority/separator/shoulder is not supported. (INDEPENDENT should be used in this case see the :ref:`pids-configuration` section for more information). + +The API can also be used to reset the dataset to use the default/inherited value: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/YD5QDG + + curl -X DELETE -H "X-Dataverse-key:$API_TOKEN" -H Content-type:application/json "$SERVER_URL/api/datasets/:persistentId/pidGenerator?persistentId=$PERSISTENT_IDENTIFIER" + +The default will always be the same provider as for the dataset PID if that provider can generate new PIDs, and will be the PID Provider set for the collection or the global default otherwise. + Files ----- +.. _get-json-rep-of-file: + Get JSON Representation of a File ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. note:: Files can be accessed using persistent identifiers. This is done by passing the constant ``:persistentId`` where the numeric id of the file is expected, and then passing the actual persistent id as a query parameter with the name ``persistentId``. +.. note:: When a file has been assigned a persistent identifier, it can be used in the API. This is done by passing the constant ``:persistentId`` where the numeric id of the file is expected, and then passing the actual persistent id as a query parameter with the name ``persistentId``. + +This endpoint returns the file metadata present in the latest dataset version. Example: Getting the file whose DOI is *10.5072/FK2/J8SJZB*: @@ -2304,6 +2876,127 @@ The fully expanded example above (without environment variables) looks like this The file id can be extracted from the response retrieved from the API which uses the persistent identifier (``/api/datasets/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER``). +By default, files from deaccessioned dataset versions are not included in the search. If no accessible dataset draft version exists, the search of the latest published file will ignore dataset deaccessioned versions unless ``includeDeaccessioned`` query parameter is set to ``true``. + +Usage example: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/files/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER&includeDeaccessioned=true" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB&includeDeaccessioned=true" + +If you want to include the dataset version of the file in the response, there is an optional parameter for this called ``returnDatasetVersion`` whose default value is ``false``. + +Usage example: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/files/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER&returnDatasetVersion=true" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB&returnDatasetVersion=true" + +Get JSON Representation of a File given a Dataset Version +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: When a file has been assigned a persistent identifier, it can be used in the API. This is done by passing the constant ``:persistentId`` where the numeric id of the file is expected, and then passing the actual persistent id as a query parameter with the name ``persistentId``. + +This endpoint returns the file metadata present in the requested dataset version. To specify the dataset version, you can use ``:latest-published``, or ``:latest``, or ``:draft`` or ``1.0`` or any other style listed under :ref:`dataset-version-specifiers`. + +Example: Getting the file whose DOI is *10.5072/FK2/J8SJZB* present in the published dataset version ``1.0``: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export DATASET_VERSION=1.0 + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/files/:persistentId/versions/$DATASET_VERSION?persistentId=$PERSISTENT_IDENTIFIER" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/:persistentId/versions/1.0?persistentId=doi:10.5072/FK2/J8SJZB" + +You may obtain a not found error depending on whether or not the specified version exists or you have permission to view it. + +By default, files from deaccessioned dataset versions are not included in the search unless ``includeDeaccessioned`` query parameter is set to ``true``. + +Usage example: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export DATASET_VERSION=:latest-published + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/files/:persistentId/versions/$DATASET_VERSION?persistentId=$PERSISTENT_IDENTIFIER&includeDeaccessioned=true" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/:persistentId/versions/:latest-published?persistentId=doi:10.5072/FK2/J8SJZB&includeDeaccessioned=true" + +If you want to include the dataset version of the file in the response, there is an optional parameter for this called ``returnDatasetVersion`` whose default value is ``false``. + +Usage example: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export DATASET_VERSION=:draft + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/files/:persistentId/versions/$DATASET_VERSION?persistentId=$PERSISTENT_IDENTIFIER&returnDatasetVersion=true" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/:persistentId/versions/:draft?persistentId=doi:10.5072/FK2/J8SJZB&returnDatasetVersion=true" + +If you want to include the dataset and collections that the file is part of in the response, there is an optional parameter for this called ``returnOwners`` whose default value is ``false``. + +Usage example: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export DATASET_VERSION=:draft + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/files/:persistentId/versions/$DATASET_VERSION?persistentId=$PERSISTENT_IDENTIFIER&returnOwners=true" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/:persistentId/versions/:draft?persistentId=doi:10.5072/FK2/J8SJZB&returnOwners=true" + + + Adding Files ~~~~~~~~~~~~ @@ -2375,10 +3068,15 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT -d true "https://demo.dataverse.org/api/files/:persistentId/restrict?persistentId=doi:10.5072/FK2/AAA000" +.. _file-uningest: + Uningest a File ~~~~~~~~~~~~~~~ -Reverse the tabular data ingest process performed on a file where ``ID`` is the database id or ``PERSISTENT_ID`` is the persistent id (DOI or Handle) of the file to process. Note that this requires "superuser" credentials. +Reverse the tabular data ingest process performed on a file where ``ID`` is the database id or ``PERSISTENT_ID`` is the persistent id (DOI or Handle) of the file to process. + +Note that this requires "superuser" credentials to undo a successful ingest and remove the variable-level metadata and .tab version of the file. +It can also be used by a user who can publish the dataset to clear the error from an unsuccessful ingest. A curl example using an ``ID``: @@ -2412,12 +3110,229 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/:persistentId/uningest?persistentId=doi:10.5072/FK2/AAA000" +.. _file-reingest: + Reingest a File ~~~~~~~~~~~~~~~ -Attempt to ingest an existing datafile as tabular data. This API can be used on a file that was not ingested as tabular back when it was uploaded. For example, a Stata v.14 file that was uploaded before ingest support for Stata 14 was added (in Dataverse Software v.4.9). It can also be used on a file that failed to ingest due to a bug in the ingest plugin that has since been fixed (hence the name "reingest"). +Attempt to ingest an existing datafile as tabular data. This API can be used on a file that was not ingested as tabular back when it was uploaded. For example, a Stata v.14 file that was uploaded before ingest support for Stata 14 was added (in Dataverse Software v.4.9). It can also be used on a file that failed to ingest due to a bug in the ingest plugin that has since been fixed (hence the name "reingest"). + +Note that this requires "superuser" credentials. + +A curl example using an ``ID`` + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=24 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/$ID/reingest" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/24/reingest" + +A curl example using a ``PERSISTENT_ID`` + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.5072/FK2/AAA000 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/:persistentId/reingest?persistentId=$PERSISTENT_ID" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/:persistentId/reingest?persistentId=doi:10.5072/FK2/AAA000" + +Note: at present, the API cannot be used on a file that's already successfully ingested as tabular. + +.. _redetect-file-type: + +Redetect File Type +~~~~~~~~~~~~~~~~~~ + +The Dataverse Software uses a variety of methods for determining file types (MIME types or content types) and these methods (listed below) are updated periodically. If you have files that have an unknown file type, you can have the Dataverse Software attempt to redetect the file type. + +When using the curl command below, you can pass ``dryRun=true`` if you don't want any changes to be saved to the database. Change this to ``dryRun=false`` (or omit it) to save the change. + +A curl example using an ``id`` + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=24 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/$ID/redetect?dryRun=true" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/24/redetect?dryRun=true" + +A curl example using a ``pid`` + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.5072/FK2/AAA000 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/:persistentId/redetect?persistentId=$PERSISTENT_ID&dryRun=true" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/:persistentId/redetect?persistentId=doi:10.5072/FK2/AAA000&dryRun=true" + +Currently the following methods are used to detect file types: + +- The file type detected by the browser (or sent via API). +- JHOVE: https://jhove.openpreservation.org +- The file extension (e.g. ".ipybn") is used, defined in a file called ``MimeTypeDetectionByFileExtension.properties``. +- The file name (e.g. "Dockerfile") is used, defined in a file called ``MimeTypeDetectionByFileName.properties``. + +.. _extractNcml: + +Extract NcML +~~~~~~~~~~~~ + +As explained in the :ref:`netcdf-and-hdf5` section of the User Guide, when those file types are uploaded, an attempt is made to extract an NcML file from them and store it as an auxiliary file. + +This happens automatically but superusers can also manually trigger this NcML extraction process with the API endpoint below. + +Note that "true" will be returned if an NcML file was created. "false" will be returned if there was an error or if the NcML file already exists (check server.log for details). + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=24 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/$ID/extractNcml" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/24/extractNcml" + +A curl example using a PID: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.5072/FK2/AAA000 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/:persistentId/extractNcml?persistentId=$PERSISTENT_ID" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/:persistentId/extractNcml?persistentId=doi:10.5072/FK2/AAA000" + +Replacing Files +~~~~~~~~~~~~~~~ + +Replace an existing file where ``ID`` is the database id of the file to replace or ``PERSISTENT_ID`` is the persistent id (DOI or Handle) of the file. Requires the ``file`` to be passed as well as a ``jsonString`` expressing the new metadata. Note that metadata such as description, directoryLabel (File Path) and tags are not carried over from the file being replaced. + +Note that when a Dataverse installation is configured to use S3 storage with direct upload enabled, there is API support to send a replacement file directly to S3. This is more complex and is described in the :doc:`/developers/s3-direct-upload-api` guide. + +A curl example using an ``ID`` + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=24 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST -F 'file=@file.extension' -F 'jsonData={json}' "$SERVER_URL/api/files/$ID/replace" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST -F 'file=@data.tsv' \ + -F 'jsonData={"description":"My description.","categories":["Data"],"forceReplace":false}' \ + "https://demo.dataverse.org/api/files/24/replace" + +A curl example using a ``PERSISTENT_ID`` + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.5072/FK2/AAA000 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST -F 'file=@file.extension' -F 'jsonData={json}' \ + "$SERVER_URL/api/files/:persistentId/replace?persistentId=$PERSISTENT_ID" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST -F 'file=@data.tsv' \ + -F 'jsonData={"description":"My description.","categories":["Data"],"forceReplace":false}' \ + "https://demo.dataverse.org/api/files/:persistentId/replace?persistentId=doi:10.5072/FK2/AAA000" + +Deleting Files +~~~~~~~~~~~~~~ + +Delete an existing file where ``ID`` is the database id of the file to delete or ``PERSISTENT_ID`` is the persistent id (DOI or Handle, if it exists) of the file. + +Note that the behavior of deleting files depends on if the dataset has ever been published or not. + +- If the dataset has never been published, the file will be deleted forever. +- If the dataset has published, the file is deleted from the draft (and future published versions). +- If the dataset has published, the deleted file can still be downloaded because it was part of a published version. + +A curl example using an ``ID`` + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=24 + + curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/files/$ID" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/files/24" + +A curl example using a ``PERSISTENT_ID`` + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.5072/FK2/AAA000 + + curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/files/:persistentId?persistentId=$PERSISTENT_ID" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/files/:persistentId?persistentId=doi:10.5072/FK2/AAA000" -Note that this requires "superuser" credentials. +Getting File Metadata +~~~~~~~~~~~~~~~~~~~~~ + +Provides a json representation of the file metadata for an existing file where ``ID`` is the database id of the file to get metadata from or ``PERSISTENT_ID`` is the persistent id (DOI or Handle) of the file. A curl example using an ``ID`` @@ -2427,13 +3342,13 @@ A curl example using an ``ID`` export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/$ID/reingest" + curl "$SERVER_URL/api/files/$ID/metadata" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/24/reingest" + curl "https://demo.dataverse.org/api/files/24/metadata" A curl example using a ``PERSISTENT_ID`` @@ -2443,26 +3358,17 @@ A curl example using a ``PERSISTENT_ID`` export SERVER_URL=https://demo.dataverse.org export PERSISTENT_ID=doi:10.5072/FK2/AAA000 - curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/:persistentId/reingest?persistentId=$PERSISTENT_ID" + curl "$SERVER_URL/api/files/:persistentId/metadata?persistentId=$PERSISTENT_ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/:persistentId/reingest?persistentId=doi:10.5072/FK2/AAA000" - -Note: at present, the API cannot be used on a file that's already successfully ingested as tabular. - -.. _redetect-file-type: - -Redetect File Type -~~~~~~~~~~~~~~~~~~ - -The Dataverse Software uses a variety of methods for determining file types (MIME types or content types) and these methods (listed below) are updated periodically. If you have files that have an unknown file type, you can have the Dataverse Software attempt to redetect the file type. + curl "https://demo.dataverse.org/api/files/:persistentId/metadata?persistentId=doi:10.5072/FK2/AAA000" -When using the curl command below, you can pass ``dryRun=true`` if you don't want any changes to be saved to the database. Change this to ``dryRun=false`` (or omit it) to save the change. +The current draft can also be viewed if you have permissions and pass your API token -A curl example using an ``id`` +A curl example using an ``ID`` .. code-block:: bash @@ -2470,15 +3376,15 @@ A curl example using an ``id`` export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/$ID/redetect?dryRun=true" + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/files/$ID/metadata/draft" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/24/redetect?dryRun=true" + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/24/metadata/draft" -A curl example using a ``pid`` +A curl example using a ``PERSISTENT_ID`` .. code-block:: bash @@ -2486,31 +3392,22 @@ A curl example using a ``pid`` export SERVER_URL=https://demo.dataverse.org export PERSISTENT_ID=doi:10.5072/FK2/AAA000 - curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/:persistentId/redetect?persistentId=$PERSISTENT_ID&dryRun=true" + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/files/:persistentId/metadata/draft?persistentId=$PERSISTENT_ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/:persistentId/redetect?persistentId=doi:10.5072/FK2/AAA000&dryRun=true" - -Currently the following methods are used to detect file types: - -- The file type detected by the browser (or sent via API). -- JHOVE: http://jhove.openpreservation.org -- The file extension (e.g. ".ipybn") is used, defined in a file called ``MimeTypeDetectionByFileExtension.properties``. -- The file name (e.g. "Dockerfile") is used, defined in a file called ``MimeTypeDetectionByFileName.properties``. - -.. _extractNcml: + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/:persistentId/metadata/draft?persistentId=doi:10.5072/FK2/AAA000" -Extract NcML -~~~~~~~~~~~~ +Note: The ``id`` returned in the json response is the id of the file metadata version. -As explained in the :ref:`netcdf-and-hdf5` section of the User Guide, when those file types are uploaded, an attempt is made to extract an NcML file from them and store it as an auxiliary file. +Getting File Data Tables +~~~~~~~~~~~~~~~~~~~~~~~~ -This happens automatically but superusers can also manually trigger this NcML extraction process with the API endpoint below. +This endpoint is oriented toward tabular files and provides a JSON representation of the file data tables for an existing tabular file. ``ID`` is the database id of the file to get the data tables from or ``PERSISTENT_ID`` is the persistent id (DOI or Handle) of the file. -Note that "true" will be returned if an NcML file was created. "false" will be returned if there was an error or if the NcML file already exists (check server.log for details). +A curl example using an ``ID`` .. code-block:: bash @@ -2518,15 +3415,15 @@ Note that "true" will be returned if an NcML file was created. "false" will be r export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/$ID/extractNcml" + curl $SERVER_URL/api/files/$ID/dataTables The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/24/extractNcml" + curl https://demo.dataverse.org/api/files/24/dataTables -A curl example using a PID: +A curl example using a ``PERSISTENT_ID`` .. code-block:: bash @@ -2534,20 +3431,22 @@ A curl example using a PID: export SERVER_URL=https://demo.dataverse.org export PERSISTENT_ID=doi:10.5072/FK2/AAA000 - curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/:persistentId/extractNcml?persistentId=$PERSISTENT_ID" + curl "$SERVER_URL/api/files/:persistentId/dataTables?persistentId=$PERSISTENT_ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/:persistentId/extractNcml?persistentId=doi:10.5072/FK2/AAA000" + curl "https://demo.dataverse.org/api/files/:persistentId/dataTables?persistentId=doi:10.5072/FK2/AAA000" -Replacing Files -~~~~~~~~~~~~~~~ +Note that if the requested file is not tabular, the endpoint will return an error. -Replace an existing file where ``ID`` is the database id of the file to replace or ``PERSISTENT_ID`` is the persistent id (DOI or Handle) of the file. Requires the ``file`` to be passed as well as a ``jsonString`` expressing the new metadata. Note that metadata such as description, directoryLabel (File Path) and tags are not carried over from the file being replaced. +.. _file-download-count: -Note that when a Dataverse installation is configured to use S3 storage with direct upload enabled, there is API support to send a replacement file directly to S3. This is more complex and is described in the :doc:`/developers/s3-direct-upload-api` guide. +Getting File Download Count +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Provides the download count for a particular file, where ``ID`` is the database id of the file to get the download count from or ``PERSISTENT_ID`` is the persistent id (DOI or Handle) of the file. A curl example using an ``ID`` @@ -2557,15 +3456,13 @@ A curl example using an ``ID`` export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key:$API_TOKEN" -X POST -F 'file=@file.extension' -F 'jsonData={json}' "$SERVER_URL/api/files/$ID/replace" + curl -H "X-Dataverse-key:$API_TOKEN" -X GET "$SERVER_URL/api/files/$ID/downloadCount" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST -F 'file=@data.tsv' \ - -F 'jsonData={"description":"My description.","categories":["Data"],"forceReplace":false}' \ - "https://demo.dataverse.org/api/files/24/replace" + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X GET "https://demo.dataverse.org/api/files/24/downloadCount" A curl example using a ``PERSISTENT_ID`` @@ -2575,27 +3472,20 @@ A curl example using a ``PERSISTENT_ID`` export SERVER_URL=https://demo.dataverse.org export PERSISTENT_ID=doi:10.5072/FK2/AAA000 - curl -H "X-Dataverse-key:$API_TOKEN" -X POST -F 'file=@file.extension' -F 'jsonData={json}' \ - "$SERVER_URL/api/files/:persistentId/replace?persistentId=$PERSISTENT_ID" + curl -H "X-Dataverse-key:$API_TOKEN" -X GET "$SERVER_URL/api/files/:persistentId/downloadCount?persistentId=$PERSISTENT_ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST -F 'file=@data.tsv' \ - -F 'jsonData={"description":"My description.","categories":["Data"],"forceReplace":false}' \ - "https://demo.dataverse.org/api/files/:persistentId/replace?persistentId=doi:10.5072/FK2/AAA000" - -Deleting Files -~~~~~~~~~~~~~~ + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X GET "https://demo.dataverse.org/api/files/:persistentId/downloadCount?persistentId=doi:10.5072/FK2/AAA000" -Delete an existing file where ``ID`` is the database id of the file to delete or ``PERSISTENT_ID`` is the persistent id (DOI or Handle, if it exists) of the file. +If you are interested in download counts for multiple files, see :doc:`/api/metrics`. -Note that the behavior of deleting files depends on if the dataset has ever been published or not. +File Has Been Deleted +~~~~~~~~~~~~~~~~~~~~~ -- If the dataset has never been published, the file will be deleted forever. -- If the dataset has published, the file is deleted from the draft (and future published versions). -- If the dataset has published, the deleted file can still be downloaded because it was part of a published version. +Know if a particular file that existed in a previous version of the dataset no longer exists in the latest version. A curl example using an ``ID`` @@ -2605,13 +3495,13 @@ A curl example using an ``ID`` export SERVER_URL=https://demo.dataverse.org export ID=24 - curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/files/$ID" + curl -H "X-Dataverse-key:$API_TOKEN" -X GET "$SERVER_URL/api/files/$ID/hasBeenDeleted" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/files/24" + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X GET "https://demo.dataverse.org/api/files/24/hasBeenDeleted" A curl example using a ``PERSISTENT_ID`` @@ -2621,18 +3511,18 @@ A curl example using a ``PERSISTENT_ID`` export SERVER_URL=https://demo.dataverse.org export PERSISTENT_ID=doi:10.5072/FK2/AAA000 - curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/files/:persistentId?persistentId=$PERSISTENT_ID" + curl -H "X-Dataverse-key:$API_TOKEN" -X GET "$SERVER_URL/api/files/:persistentId/hasBeenDeleted?persistentId=$PERSISTENT_ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/files/:persistentId?persistentId=doi:10.5072/FK2/AAA000" + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X GET "https://demo.dataverse.org/api/files/:persistentId/hasBeenDeleted?persistentId=doi:10.5072/FK2/AAA000" -Getting File Metadata -~~~~~~~~~~~~~~~~~~~~~ +Updating File Metadata +~~~~~~~~~~~~~~~~~~~~~~ -Provides a json representation of the file metadata for an existing file where ``ID`` is the database id of the file to get metadata from or ``PERSISTENT_ID`` is the persistent id (DOI or Handle) of the file. +Updates the file metadata for an existing file where ``ID`` is the database id of the file to update or ``PERSISTENT_ID`` is the persistent id (DOI or Handle) of the file. Requires a ``jsonString`` expressing the new metadata. No metadata from the previous version of this file will be persisted, so if you want to update a specific field first get the json with the above command and alter the fields you want. A curl example using an ``ID`` @@ -2642,13 +3532,17 @@ A curl example using an ``ID`` export SERVER_URL=https://demo.dataverse.org export ID=24 - curl "$SERVER_URL/api/files/$ID/metadata" + curl -H "X-Dataverse-key:$API_TOKEN" -X POST \ + -F 'jsonData={"description":"My description bbb.","provFreeform":"Test prov freeform","categories":["Data"],"dataFileTags":["Survey"],"restrict":false}' \ + "$SERVER_URL/api/files/$ID/metadata" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl "https://demo.dataverse.org/api/files/24/metadata" + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST \ + -F 'jsonData={"description":"My description bbb.","provFreeform":"Test prov freeform","categories":["Data"],"dataFileTags":["Survey"],"restrict":false}' \ + "https://demo.dataverse.org/api/files/24/metadata" A curl example using a ``PERSISTENT_ID`` @@ -2658,15 +3552,39 @@ A curl example using a ``PERSISTENT_ID`` export SERVER_URL=https://demo.dataverse.org export PERSISTENT_ID=doi:10.5072/FK2/AAA000 - curl "$SERVER_URL/api/files/:persistentId/metadata?persistentId=$PERSISTENT_ID" + curl -H "X-Dataverse-key:$API_TOKEN" -X POST \ + -F 'jsonData={"description":"My description bbb.","provFreeform":"Test prov freeform","categories":["Data"],"dataFileTags":["Survey"],"restrict":false}' \ + "$SERVER_URL/api/files/:persistentId/metadata?persistentId=$PERSISTENT_ID" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl "https://demo.dataverse.org/api/files/:persistentId/metadata?persistentId=doi:10.5072/FK2/AAA000" + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST \ + -F 'jsonData={"description":"My description bbb.","provFreeform":"Test prov freeform","categories":["Data"],"dataFileTags":["Survey"],"restrict":false}' \ + "https://demo.dataverse.org/api/files/:persistentId/metadata?persistentId=doi:10.5072/FK2/AAA000" -The current draft can also be viewed if you have permissions and pass your API token +Note: To update the 'tabularTags' property of file metadata, use the 'dataFileTags' key when making API requests. This property is used to update the 'tabularTags' of the file metadata. + +Also note that dataFileTags are not versioned and changes to these will update the published version of the file. + +.. _EditingVariableMetadata: + +Updating File Metadata Categories +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Updates the categories for an existing file where ``ID`` is the database id of the file to update or ``PERSISTENT_ID`` is the persistent id (DOI or Handle) of the file. Requires a ``jsonString`` expressing the category names. + +Although updating categories can also be done with the previous endpoint, this has been created to be more practical when it is only necessary to update categories and not other metadata fields. + +The JSON representation of file categories (``categories.json``) looks like this:: + + { + "categories": [ + "Data", + "Custom" + ] + } A curl example using an ``ID`` @@ -2675,14 +3593,19 @@ A curl example using an ``ID`` export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org export ID=24 + export FILE_PATH=categories.json - curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/files/$ID/metadata/draft" + curl -H "X-Dataverse-key:$API_TOKEN" -X POST \ + "$SERVER_URL/api/files/$ID/metadata/categories" \ + -H "Content-type:application/json" --upload-file $FILE_PATH The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/24/metadata/draft" + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST \ + "http://demo.dataverse.org/api/files/24/metadata/categories" \ + -H "Content-type:application/json" --upload-file categories.json A curl example using a ``PERSISTENT_ID`` @@ -2691,22 +3614,35 @@ A curl example using a ``PERSISTENT_ID`` export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org export PERSISTENT_ID=doi:10.5072/FK2/AAA000 + export FILE_PATH=categories.json - curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/files/:persistentId/metadata/draft?persistentId=$PERSISTENT_ID" + curl -H "X-Dataverse-key:$API_TOKEN" -X POST \ + "$SERVER_URL/api/files/:persistentId/metadata/categories?persistentId=$PERSISTENT_ID" \ + -H "Content-type:application/json" --upload-file $FILE_PATH The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/:persistentId/metadata/draft?persistentId=doi:10.5072/FK2/AAA000" + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST \ + "https://demo.dataverse.org/api/files/:persistentId/metadata/categories?persistentId=doi:10.5072/FK2/AAA000" \ + -H "Content-type:application/json" --upload-file categories.json -Note: The ``id`` returned in the json response is the id of the file metadata version. +Note that if the specified categories do not exist, they will be created. +Updating File Tabular Tags +~~~~~~~~~~~~~~~~~~~~~~~~~~ -Updating File Metadata -~~~~~~~~~~~~~~~~~~~~~~ +Updates the tabular tags for an existing tabular file where ``ID`` is the database id of the file to update or ``PERSISTENT_ID`` is the persistent id (DOI or Handle) of the file. Requires a ``jsonString`` expressing the tabular tag names. -Updates the file metadata for an existing file where ``ID`` is the database id of the file to update or ``PERSISTENT_ID`` is the persistent id (DOI or Handle) of the file. Requires a ``jsonString`` expressing the new metadata. No metadata from the previous version of this file will be persisted, so if you want to update a specific field first get the json with the above command and alter the fields you want. +The JSON representation of tabular tags (``tags.json``) looks like this:: + + { + "tabularTags": [ + "Survey", + "Genomics" + ] + } A curl example using an ``ID`` @@ -2715,18 +3651,19 @@ A curl example using an ``ID`` export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org export ID=24 + export FILE_PATH=tags.json curl -H "X-Dataverse-key:$API_TOKEN" -X POST \ - -F 'jsonData={"description":"My description bbb.","provFreeform":"Test prov freeform","categories":["Data"],"restrict":false}' \ - "$SERVER_URL/api/files/$ID/metadata" + "$SERVER_URL/api/files/$ID/metadata/tabularTags" \ + -H "Content-type:application/json" --upload-file $FILE_PATH The fully expanded example above (without environment variables) looks like this: .. code-block:: bash curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST \ - -F 'jsonData={"description":"My description bbb.","provFreeform":"Test prov freeform","categories":["Data"],"restrict":false}' \ - "http://demo.dataverse.org/api/files/24/metadata" + "http://demo.dataverse.org/api/files/24/metadata/tabularTags" \ + -H "Content-type:application/json" --upload-file tags.json A curl example using a ``PERSISTENT_ID`` @@ -2735,22 +3672,29 @@ A curl example using a ``PERSISTENT_ID`` export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org export PERSISTENT_ID=doi:10.5072/FK2/AAA000 + export FILE_PATH=tags.json curl -H "X-Dataverse-key:$API_TOKEN" -X POST \ - -F 'jsonData={"description":"My description bbb.","provFreeform":"Test prov freeform","categories":["Data"],"restrict":false}' \ - "$SERVER_URL/api/files/:persistentId/metadata?persistentId=$PERSISTENT_ID" + "$SERVER_URL/api/files/:persistentId/metadata/tabularTags?persistentId=$PERSISTENT_ID" \ + -H "Content-type:application/json" --upload-file $FILE_PATH The fully expanded example above (without environment variables) looks like this: .. code-block:: bash curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST \ - -F 'jsonData={"description":"My description bbb.","provFreeform":"Test prov freeform","categories":["Data"],"restrict":false}' \ - "https://demo.dataverse.org/api/files/:persistentId/metadata?persistentId=doi:10.5072/FK2/AAA000" + "https://demo.dataverse.org/api/files/:persistentId/metadata/tabularTags?persistentId=doi:10.5072/FK2/AAA000" \ + -H "Content-type:application/json" --upload-file tags.json -Also note that dataFileTags are not versioned and changes to these will update the published version of the file. +Note that the specified tabular tags must be valid. The supported tags are: -.. _EditingVariableMetadata: +* ``Survey`` +* ``Time Series`` +* ``Panel`` +* ``Event`` +* ``Genomics`` +* ``Network`` +* ``Geospatial`` Editing Variable Level Metadata ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2776,6 +3720,55 @@ The fully expanded example above (without environment variables) looks like this You can download :download:`dct.xml <../../../../src/test/resources/xml/dct.xml>` from the example above to see what the XML looks like. +Get File Citation as JSON +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This API is for getting the file citation as it appears on the file landing page. It is formatted in HTML and encoded in JSON. + +To specify the version, you can use ``:latest-published`` or ``:draft`` or ``1.0`` or any other style listed under :ref:`dataset-version-specifiers`. + +When the dataset version is published, authentication is not required: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export FILE_ID=42 + export DATASET_VERSION=:latest-published + + curl "$SERVER_URL/api/files/$FILE_ID/versions/$DATASET_VERSION/citation" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/files/42/versions/:latest-published/citation" + +When the dataset version is a draft or deaccessioned, authentication is required. + +By default, deaccessioned dataset versions are not included in the search when applying the :latest or :latest-published identifiers. Additionally, when filtering by a specific version tag, you will get a "unauthorized" error if the version is deaccessioned and you do not enable the ``includeDeaccessioned`` option described below. + +If you want to include deaccessioned dataset versions, you must set ``includeDeaccessioned`` query parameter to ``true``. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export FILE_ID=42 + export DATASET_VERSION=:draft + export INCLUDE_DEACCESSIONED=true + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/files/$FILE_ID/versions/$DATASET_VERSION/citation?includeDeaccessioned=$INCLUDE_DEACCESSIONED" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/files/42/versions/:draft/citation?includeDeaccessioned=true" + +If your file has a persistent identifier (PID, such as a DOI), you can pass it using the technique described under :ref:`get-json-rep-of-file`. + +This API is not for downloading various citation formats such as EndNote XML, RIS, or BibTeX. This functionality has been requested in https://github.com/IQSS/dataverse/issues/3140 and https://github.com/IQSS/dataverse/issues/9994. + Provenance ~~~~~~~~~~ @@ -3318,6 +4311,8 @@ Show Support Of Incomplete Metadata Deposition Learn if an instance has been configured to allow deposition of incomplete datasets via the API. See also :ref:`create-dataset-command` and :ref:`dataverse.api.allow-incomplete-metadata` +.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of export below. + .. code-block:: bash export SERVER_URL=https://demo.dataverse.org @@ -3330,6 +4325,45 @@ The fully expanded example above (without environment variables) looks like this curl "https://demo.dataverse.org/api/info/settings/incompleteMetadataViaApi" +Get Zip File Download Limit +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Get the configured zip file download limit. The response contains the long value of the limit in bytes. + +This limit comes from the database setting :ref:`:ZipDownloadLimit` if set, or the default value if the database setting is not set, which is 104857600 (100MB). + +.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of export below. + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + + curl "$SERVER_URL/api/info/zipDownloadLimit" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/info/zipDownloadLimit" + +Get Maximum Embargo Duration In Months +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Get the maximum embargo duration in months, if available, configured through the database setting :ref:`:MaxEmbargoDurationInMonths` from the Configuration section of the Installation Guide. + +.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of export below. + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + + curl "$SERVER_URL/api/info/settings/:MaxEmbargoDurationInMonths" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/info/settings/:MaxEmbargoDurationInMonths" .. _metadata-blocks-api: @@ -3826,6 +4860,56 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/pids/:persistentId/delete?persistentId=doi:10.70122/FK2/9BXT5O" +.. _pids-providers-api: + +Get Information about Configured PID Providers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Dataverse can be configured with one or more PID Providers that it uses to create new PIDs and manage existing ones. +This API call returns a JSONObject listing the configured providers and details about the protocol/authority/separator/shoulder they manage, +along with information about about how new dataset and datafile PIDs are generated. See the :ref:`pids-configuration` section for more information. + +.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of export below. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/pids/providers" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/pids/providers" + +Get the id of the PID Provider Managing a Given PID +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Dataverse can be configured with one or more PID Providers that it uses to create new PIDs and manage existing ones. +This API call returns the string id of the PID Provider than manages a given PID. See the :ref:`pids-configuration` section for more information. +Delete PID (this is only possible for PIDs that are in the "draft" state) and within a Dataverse installation, set ``globalidcreatetime`` to null and ``identifierregistered`` to false. A superuser API token is required. + +.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of export below. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PID=doi:10.70122/FK2/9BXT5O + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/pids/providers/$PID" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/pids/providers/doi:10.70122/FK2/9BXT5O" + +If the PID is not managed by Dataverse, this call will report if the PID is recognized as a valid PID for a given protocol (doi, hdl, or perma) + or will return a 400/Bad Request response if it is not. + .. _admin: @@ -4677,7 +5761,6 @@ A curl example using allowing access to a dataset's metadata Please see :ref:`dataverse.api.signature-secret` for the configuration option to add a shared secret, enabling extra security. - .. _send-feedback: Send Feedback To Contact(s) @@ -4704,6 +5787,33 @@ A curl example using an ``ID`` Note that this call could be useful in coordinating with dataset authors (assuming they are also contacts) as an alternative/addition to the functionality provided by :ref:`return-a-dataset`. +.. _thumbnail_reset: + +Reset Thumbnail Failure Flags +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If Dataverse attempts to create a thumbnail image for an image or PDF file and the attempt fails, Dataverse will set a flag for the file to avoid repeated attempts to generate the thumbnail. +For cases where the problem may have been temporary (or fixed in a later Dataverse release), the API calls below can be used to reset this flag for all files or for a given file. + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export FILE_ID=1234 + + curl -X DELETE $SERVER_URL/api/admin/clearThumbnailFailureFlag + + curl -X DELETE $SERVER_URL/api/admin/clearThumbnailFailureFlag/$FILE_ID + +.. _download-file-from-tmp: + +Download File from /tmp +~~~~~~~~~~~~~~~~~~~~~~~ + +As a superuser:: + + GET /api/admin/downloadTmpFile?fullyQualifiedPathToFile=/tmp/foo.txt + +Note that this API is probably only useful for testing. MyData ------ diff --git a/doc/sphinx-guides/source/api/search.rst b/doc/sphinx-guides/source/api/search.rst index b941064f173..e8d0a0b3ea7 100755 --- a/doc/sphinx-guides/source/api/search.rst +++ b/doc/sphinx-guides/source/api/search.rst @@ -25,7 +25,7 @@ Parameters Name Type Description =============== ======= =========== q string The search term or terms. Using "title:data" will search only the "title" field. "*" can be used as a wildcard either alone or adjacent to a term (i.e. "bird*"). For example, https://demo.dataverse.org/api/search?q=title:data . For a list of fields to search, please see https://github.com/IQSS/dataverse/issues/2558 (for now). -type string Can be either "Dataverse", "dataset", or "file". Multiple "type" parameters can be used to include multiple types (i.e. ``type=dataset&type=file``). If omitted, all types will be returned. For example, https://demo.dataverse.org/api/search?q=*&type=dataset +type string Can be either "dataverse", "dataset", or "file". Multiple "type" parameters can be used to include multiple types (i.e. ``type=dataset&type=file``). If omitted, all types will be returned. For example, https://demo.dataverse.org/api/search?q=*&type=dataset subtree string The identifier of the Dataverse collection to which the search should be narrowed. The subtree of this Dataverse collection and all its children will be searched. Multiple "subtree" parameters can be used to include multiple Dataverse collections. For example, https://demo.dataverse.org/api/search?q=data&subtree=birds&subtree=cats . sort string The sort field. Supported values include "name" and "date". See example under "order". order string The order in which to sort. Can either be "asc" or "desc". For example, https://demo.dataverse.org/api/search?q=data&sort=name&order=asc diff --git a/doc/sphinx-guides/source/api/sword.rst b/doc/sphinx-guides/source/api/sword.rst index 11b43e98774..51391784bde 100755 --- a/doc/sphinx-guides/source/api/sword.rst +++ b/doc/sphinx-guides/source/api/sword.rst @@ -9,19 +9,19 @@ SWORD_ stands for "Simple Web-service Offering Repository Deposit" and is a "pro About ----- -Introduced in Dataverse Network (DVN) `3.6 `_, the SWORD API was formerly known as the "Data Deposit API" and ``data-deposit/v1`` appeared in the URLs. For backwards compatibility these URLs continue to work (with deprecation warnings). Due to architectural changes and security improvements (especially the introduction of API tokens) in Dataverse Software 4.0, a few backward incompatible changes were necessarily introduced and for this reason the version has been increased to ``v1.1``. For details, see :ref:`incompatible`. +Introduced in Dataverse Network (DVN) `3.6 `_, the SWORD API was formerly known as the "Data Deposit API" and ``data-deposit/v1`` appeared in the URLs. For backwards compatibility these URLs continue to work (with deprecation warnings). Due to architectural changes and security improvements (especially the introduction of API tokens) in Dataverse Software 4.0, a few backward incompatible changes were necessarily introduced and for this reason the version has been increased to ``v1.1``. For details, see :ref:`incompatible`. -The Dataverse Software implements most of SWORDv2_, which is specified at http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html . Please reference the `SWORDv2 specification`_ for expected HTTP status codes (i.e. 201, 204, 404, etc.), headers (i.e. "Location"), etc. +The Dataverse Software implements most of SWORDv2_, which is specified at https://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html . Please reference the `SWORDv2 specification`_ for expected HTTP status codes (i.e. 201, 204, 404, etc.), headers (i.e. "Location"), etc. As a profile of AtomPub, XML is used throughout SWORD. As of Dataverse Software 4.0 datasets can also be created via JSON using the "native" API. SWORD is limited to the dozen or so fields listed below in the crosswalk, but the native API allows you to populate all metadata fields available in a Dataverse installation. -.. _SWORD: http://en.wikipedia.org/wiki/SWORD_%28protocol%29 +.. _SWORD: https://en.wikipedia.org/wiki/SWORD_%28protocol%29 .. _SWORDv2: http://swordapp.org/sword-v2/sword-v2-specifications/ .. _RFC 5023: https://tools.ietf.org/html/rfc5023 -.. _SWORDv2 specification: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html +.. _SWORDv2 specification: https://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html .. _sword-auth: @@ -86,7 +86,7 @@ New features as of v1.1 - "Contact E-mail" is automatically populated from dataset owner's email. -- "Subject" uses our controlled vocabulary list of subjects. This list is in the Citation Metadata of our User Guide > `Metadata References `_. Otherwise, if a term does not match our controlled vocabulary list, it will put any subject terms in "Keyword". If Subject is empty it is automatically populated with "N/A". +- "Subject" uses our controlled vocabulary list of subjects. This list is in the Citation Metadata of our User Guide > `Metadata References `_. Otherwise, if a term does not match our controlled vocabulary list, it will put any subject terms in "Keyword". If Subject is empty it is automatically populated with "N/A". - Zero-length files are now allowed (but not necessarily encouraged). @@ -127,7 +127,7 @@ Dublin Core Terms (DC Terms) Qualified Mapping - Dataverse Project DB Element Cr +-----------------------------+----------------------------------------------+--------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ |dcterms:creator | authorName (LastName, FirstName) | Y | Author(s) for the Dataset. | +-----------------------------+----------------------------------------------+--------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ -|dcterms:subject | subject (Controlled Vocabulary) OR keyword | Y | Controlled Vocabulary list is in our User Guide > `Metadata References `_. | +|dcterms:subject | subject (Controlled Vocabulary) OR keyword | Y | Controlled Vocabulary list is in our User Guide > `Metadata References `_. | +-----------------------------+----------------------------------------------+--------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ |dcterms:description | dsDescriptionValue | Y | Describing the purpose, scope or nature of the Dataset. Can also use dcterms:abstract. | +-----------------------------+----------------------------------------------+--------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/doc/sphinx-guides/source/conf.py b/doc/sphinx-guides/source/conf.py index 7ff17eb45ed..98d10526517 100755 --- a/doc/sphinx-guides/source/conf.py +++ b/doc/sphinx-guides/source/conf.py @@ -38,11 +38,12 @@ # ones. extensions = [ 'sphinx.ext.autodoc', - 'sphinx.ext.intersphinx', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', 'sphinx.ext.graphviz', 'sphinxcontrib.icon', + 'myst_parser', + 'sphinx_tabs.tabs', ] # Add any paths that contain templates here, relative to this directory. @@ -66,9 +67,9 @@ # built documents. # # The short X.Y version. -version = '6.0' +version = '6.1' # The full version, including alpha/beta/rc tags. -release = '6.0' +release = '6.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -430,9 +431,6 @@ # If false, no index is generated. #epub_use_index = True - -# Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'http://docs.python.org/': None} # Suppress "WARNING: unknown mimetype for ..." https://github.com/IQSS/dataverse/issues/3391 suppress_warnings = ['epub.unknown_project_files'] rst_prolog = """ diff --git a/doc/sphinx-guides/source/container/app-image.rst b/doc/sphinx-guides/source/container/app-image.rst index 29f6d6ac1d4..caf4aadbf7e 100644 --- a/doc/sphinx-guides/source/container/app-image.rst +++ b/doc/sphinx-guides/source/container/app-image.rst @@ -22,20 +22,20 @@ IQSS will not offer you support how to deploy or run it, please reach out to the You might be interested in taking a look at :doc:`../developers/containers`, linking you to some (community-based) efforts. - +.. _supported-image-tags-app: Supported Image Tags ++++++++++++++++++++ This image is sourced from the main upstream code `repository of the Dataverse software `_. -Development and maintenance of the `image's code `_ happens there -(again, by the community). - -.. note:: - Please note that this image is not (yet) available from Docker Hub. You need to build local to use - (see below). Follow https://github.com/IQSS/dataverse/issues/9444 for new developments. - - +Development and maintenance of the `image's code `_ +happens there (again, by the community). Community-supported image tags are based on the two most important +upstream branches: + +- The ``unstable`` tag corresponds to the ``develop`` branch, where pull requests are merged. + (`Dockerfile `__) +- The ``alpha`` tag corresponds to the ``master`` branch, where releases are cut from. + (`Dockerfile `__) Image Contents ++++++++++++++ diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 1a47a8fc413..c41250d48c5 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -217,7 +217,14 @@ provides. These are mostly based on environment variables (very common with cont - ``0`` - Bool, ``0|1`` - Enable the dynamic "hot" reloads of files when changed in a deployment. Useful for development, - when new artifacts are copied into the running domain. + when new artifacts are copied into the running domain. Also, export Dataverse specific environment variables + ``DATAVERSE_JSF_PROJECT_STAGE=Development`` and ``DATAVERSE_JSF_REFRESH_PERIOD=0`` to enable dynamic JSF page + reloads. + * - ``SKIP_DEPLOY`` + - ``0`` + - Bool, ``0|1`` or ``false|true`` + - When active, do not deploy applications from ``DEPLOY_DIR`` (see below), just start the application server. + Will still execute any provided init scripts and only skip deployments within the default init scripts. * - ``DATAVERSE_HTTP_TIMEOUT`` - ``900`` - Seconds @@ -272,7 +279,8 @@ building upon it. You can also use these for references in scripts, etc. (Might be reused for Dataverse one day) * - ``DEPLOY_DIR`` - ``${HOME_DIR}/deployments`` - - Any EAR or WAR file, exploded WAR directory etc are autodeployed on start + - Any EAR or WAR file, exploded WAR directory etc are autodeployed on start. + See also ``SKIP_DEPLOY`` above. * - ``DOMAIN_DIR`` - ``${PAYARA_DIR}/glassfish`` ``/domains/${DOMAIN_NAME}`` - Path to root of the Payara domain applications will be deployed into. Usually ``${DOMAIN_NAME}`` will be ``domain1``. @@ -299,9 +307,9 @@ named Docker volume in these places to avoid data loss, gain performance and/or - Description * - ``STORAGE_DIR`` - ``/dv`` - - This place is writeable by the Payara user, making it usable as a place to store research data, customizations - or other. Images inheriting the base image should create distinct folders here, backed by different - mounted volumes. + - This place is writeable by the Payara user, making it usable as a place to store research data, customizations or other. + Images inheriting the base image should create distinct folders here, backed by different mounted volumes. + Enforce correct filesystem permissions on the mounted volume using ``fix-fs-perms.sh`` from :doc:`configbaker-image` or similar scripts. * - ``SECRETS_DIR`` - ``/secrets`` - Mount secrets or other here, being picked up automatically by @@ -353,6 +361,8 @@ Other Hints By default, ``domain1`` is enabled to use the ``G1GC`` garbage collector. +To access the Payara Admin Console or use the ``asadmin`` command, use username ``admin`` and password ``admin``. + For running a Java application within a Linux based container, the support for CGroups is essential. It has been included and activated by default since Java 8u192, Java 11 LTS and later. If you are interested in more details, you can read about those in a few places like https://developers.redhat.com/articles/2022/04/19/java-17-whats-new-openjdks-container-awareness, diff --git a/doc/sphinx-guides/source/container/configbaker-image.rst b/doc/sphinx-guides/source/container/configbaker-image.rst index 7218e2d8d14..d098bd46436 100644 --- a/doc/sphinx-guides/source/container/configbaker-image.rst +++ b/doc/sphinx-guides/source/container/configbaker-image.rst @@ -86,7 +86,7 @@ Maven modules packaging target with activated "container" profile from the proje If you specifically want to build a config baker image *only*, try -``mvn -Pct package -Ddocker.filter=dev_bootstrap`` +``mvn -Pct docker:build -Ddocker.filter=dev_bootstrap`` The build of config baker involves copying Solr configset files. The Solr version used is inherited from Maven, acting as the single source of truth. Also, the tag of the image should correspond the application image, as diff --git a/doc/sphinx-guides/source/container/dev-usage.rst b/doc/sphinx-guides/source/container/dev-usage.rst index 04c7eba7913..be4eda5da44 100644 --- a/doc/sphinx-guides/source/container/dev-usage.rst +++ b/doc/sphinx-guides/source/container/dev-usage.rst @@ -141,26 +141,205 @@ Alternatives: Options are the same. -Re-Deploying ------------- +Redeploying +----------- + +The safest and most reliable way to redeploy code is to stop the running containers (with Ctrl-c if you started them in the foreground) and then build and run them again with ``mvn -Pct clean package docker:run``. +Safe, but also slowing down the development cycle a lot. + +Triggering redeployment of changes using an IDE can greatly improve your feedback loop when changing code. +You have at least two options: + +#. Use builtin features of IDEs or `IDE plugins from Payara `_. +#. Use a paid product like `JRebel `_. + +The main differences between the first and the second options are support for hot deploys of non-class files and limitations in what the JVM HotswapAgent can do for you. +Find more details in a `blog article by JRebel `_. + +.. _ide-trigger-code-deploy: + +IDE Triggered Code Re-Deployments +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To make use of builtin features or Payara IDE Tools (option 1), please follow steps below. +Note that using this method, you may redeploy a complete WAR or single methods. +Redeploying WARs supports swapping and adding classes and non-code materials, but is slower (still faster than rebuilding containers). +Hotswapping methods requires using JDWP (Debug Mode), but does not allow switching non-code material or adding classes. + +#. | Download the version of Payara shown in :ref:`install-payara-dev` and unzip it to a reasonable location such as ``/usr/local/payara6``. + | - Note that Payara can also be downloaded from `Maven Central `_. + | - Note that another way to check the expected version of Payara is to run this command: + | ``mvn help:evaluate -Dexpression=payara.version -q -DforceStdout`` + +#. Install Payara Tools plugin in your IDE: + + .. tabs:: + .. group-tab:: Netbeans + + This step is not necessary for Netbeans. The feature is builtin. + + .. group-tab:: IntelliJ + + **Requires IntelliJ Ultimate!** + (Note that `free educational licenses `_ are available) + + .. image:: img/intellij-payara-plugin-install.png + +#. Configure a connection to Payara: + + .. tabs:: + .. group-tab:: Netbeans + + Launch Netbeans and click "Tools" and then "Servers". Click "Add Server" and select "Payara Server" and set the installation location to ``/usr/local/payara6`` (or wherever you unzipped Payara). Choose "Remote Domain". Use the settings in the screenshot below. Most of the defaults are fine. + + Under "Common", the username and password should be "admin". Make sure "Enable Hot Deploy" is checked. + + .. image:: img/netbeans-servers-common.png + + Under "Java", change the debug port to 9009. + + .. image:: img/netbeans-servers-java.png + + Open the project properties (under "File"), navigate to "Compile" and make sure "Compile on Save" is checked. + + .. image:: img/netbeans-compile.png + + Under "Run", under "Server", select "Payara Server". Make sure "Deploy on Save" is checked. + + .. image:: img/netbeans-run.png + + .. group-tab:: IntelliJ + Create a new running configuration with a "Remote Payara". + (Open dialog by clicking "Run", then "Edit Configurations") + + .. image:: img/intellij-payara-add-new-config.png + + Click on "Configure" next to "Application Server". + Add an application server and select unzipped local directory. + + .. image:: img/intellij-payara-config-add-server.png + + Add admin password "admin" and add "building artifact" before launch. + Make sure to select the WAR, *not* exploded! + + .. image:: img/intellij-payara-config-server.png + + Go to "Deployment" tab and add the Dataverse WAR, *not* exploded!. + + .. image:: img/intellij-payara-config-deployment.png + + Go to "Startup/Connection" tab, select "Debug" and change port to ``9009``. + + .. image:: img/intellij-payara-config-startup.png + + You might want to tweak the hot deploy behavior in the "Server" tab now. + "Update action" can be found in the run window (see below). + "Frame deactivation" means switching from IntelliJ window to something else, e.g. your browser. + *Note: static resources like properties, XHTML etc will only update when redeploying!* + + .. image:: img/intellij-payara-config-server-behaviour.png + +#. Start all the containers, but take care to skip application deployment. + + .. tabs:: + .. group-tab:: Maven + ``mvn -Pct docker:run -Dapp.skipDeploy`` + + Run above command in your terminal to start containers in foreground and skip deployment. + See cheat sheet above for more options. + Note that this command either assumes you built the :doc:`app-image` first or will download it from Docker Hub. + .. group-tab:: Compose + ``SKIP_DEPLOY=1 docker compose -f docker-compose-dev.yml up`` + + Run above command in your terminal to start containers in foreground and skip deployment. + See cheat sheet above for more options. + Note that this command either assumes you built the :doc:`app-image` first or will download it from Docker Hub. + .. group-tab:: IntelliJ + You can create a service configuration to automatically start services for you. + + **IMPORTANT**: This requires installation of the `Docker plugin `_. + + **NOTE**: You might need to change the Docker Compose executable in your IDE settings to ``docker`` if you have no ``docker-compose`` bin (*File > Settings > Build > Docker > Tools*). + + .. image:: img/intellij-compose-add-new-config.png + + Give your configuration a meaningful name, select the compose file to use (in this case the default one), add the environment variable ``SKIP_DEPLOY=1``, and optionally select the services to start. + You might also want to change other options like attaching to containers to view the logs within the "Services" tab. + + .. image:: img/intellij-compose-setup.png + + Now run the configuration to prepare for deployment and watch it unfold in the "Services" tab. + + .. image:: img/intellij-compose-run.png + .. image:: img/intellij-compose-services.png + + Note: the Admin Console can be reached at http://localhost:4848 or https://localhost:4949 + +#. To deploy the application to the running server, use the configured tools to deploy. + Using the "Run" configuration only deploys and enables redeploys, while running "Debug" enables hot swapping of classes via JDWP. + + .. tabs:: + .. group-tab:: Netbeans + + Click "Debug" then "Debug Project". After some time, Dataverse will be deployed. + + Try making a code change, perhaps to ``Info.java``. + + Click "Debug" and then "Apply Code Changes". If the change was correctly applied, you should see output similar to this: + + .. code-block:: + + Classes to reload: + edu.harvard.iq.dataverse.api.Info + + Code updated + + Check to make sure the change is live by visiting, for example, http://localhost:8080/api/info/version + + See below for a `video `_ demonstrating the steps above but please note that the ports used have changed and now that we have the concept of "skip deploy" the undeployment step shown is no longer necessary. + + .. raw:: html + + + + .. group-tab:: IntelliJ + Choose "Run" or "Debug" in the toolbar. + + .. image:: img/intellij-payara-run-toolbar.png + + Watch the WAR build and the deployment unfold. + Note the "Update" action button (see config to change its behavior). + + .. image:: img/intellij-payara-run-output.png + + Manually hotswap classes in "Debug" mode via "Run" > "Debugging Actions" > "Reload Changed Classes". + + .. image:: img/intellij-payara-run-menu-reload.png + +Note: in the background, the bootstrap job will wait for Dataverse to be deployed and responsive. +When your IDE automatically opens the URL a newly deployed, not bootstrapped Dataverse application, it might take some more time and page refreshes until the job finishes. + +IDE Triggered Non-Code Re-Deployments +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Currently, the only safe and tested way to re-deploy the Dataverse application after you applied code changes is -by recreating the container(s). In the future, more options may be added here. +Either redeploy the WAR (see above), use JRebel or look into copying files into the exploded WAR within the running container. +The steps below describe options to enable the later in different IDEs. -If you started your containers in foreground, just stop them and follow the steps for building and running again. -The same goes for using Maven to start the containers in the background. +.. tabs:: + .. group-tab:: IntelliJ -In case of using Docker Compose and starting the containers in the background, you can use a workaround to only -restart the application container: + This imitates the Netbeans builtin function to copy changes to files under ``src/main/webapp`` into a destination folder. + It is different in the way that it will copy the files into the running container deployment without using a bind mount. -.. code-block:: + 1. Install the `File Watchers plugin `_ + 2. Import the :download:`watchers.xml <../../../../docker/util/intellij/watchers.xml>` file at *File > Settings > Tools > File Watchers* + 3. Once you have the deployment running (see above), editing files under ``src/main/webapp`` will be copied into the container after saving the edited file. + Note: by default, IDE auto-saves will not trigger the copy. + 4. Changes are visible once you reload the browser window. - # First rebuild the container (will complain about an image still in use, this is fine.) - mvn -Pct package - # Then re-create the container (will automatically restart the container for you) - docker compose -f docker-compose-dev.yml create dev_dataverse + **IMPORTANT**: This tool assumes you are using the :ref:`ide-trigger-code-deploy` method to run Dataverse. -Using ``docker container inspect dev_dataverse | grep Image`` you can verify the changed checksums. + **IMPORTANT**: This tool uses a Bash shell script and is thus limited to Mac and Linux OS. Using a Debugger ---------------- diff --git a/doc/sphinx-guides/source/container/img/intellij-compose-add-new-config.png b/doc/sphinx-guides/source/container/img/intellij-compose-add-new-config.png new file mode 100644 index 00000000000..cec9bb357fe Binary files /dev/null and b/doc/sphinx-guides/source/container/img/intellij-compose-add-new-config.png differ diff --git a/doc/sphinx-guides/source/container/img/intellij-compose-run.png b/doc/sphinx-guides/source/container/img/intellij-compose-run.png new file mode 100644 index 00000000000..e01744134f9 Binary files /dev/null and b/doc/sphinx-guides/source/container/img/intellij-compose-run.png differ diff --git a/doc/sphinx-guides/source/container/img/intellij-compose-services.png b/doc/sphinx-guides/source/container/img/intellij-compose-services.png new file mode 100644 index 00000000000..1c500c54201 Binary files /dev/null and b/doc/sphinx-guides/source/container/img/intellij-compose-services.png differ diff --git a/doc/sphinx-guides/source/container/img/intellij-compose-setup.png b/doc/sphinx-guides/source/container/img/intellij-compose-setup.png new file mode 100644 index 00000000000..42c2accf2b4 Binary files /dev/null and b/doc/sphinx-guides/source/container/img/intellij-compose-setup.png differ diff --git a/doc/sphinx-guides/source/container/img/intellij-payara-add-new-config.png b/doc/sphinx-guides/source/container/img/intellij-payara-add-new-config.png new file mode 100644 index 00000000000..d1c7a8f2777 Binary files /dev/null and b/doc/sphinx-guides/source/container/img/intellij-payara-add-new-config.png differ diff --git a/doc/sphinx-guides/source/container/img/intellij-payara-config-add-server.png b/doc/sphinx-guides/source/container/img/intellij-payara-config-add-server.png new file mode 100644 index 00000000000..54ffbd1b713 Binary files /dev/null and b/doc/sphinx-guides/source/container/img/intellij-payara-config-add-server.png differ diff --git a/doc/sphinx-guides/source/container/img/intellij-payara-config-deployment.png b/doc/sphinx-guides/source/container/img/intellij-payara-config-deployment.png new file mode 100644 index 00000000000..52adee056b5 Binary files /dev/null and b/doc/sphinx-guides/source/container/img/intellij-payara-config-deployment.png differ diff --git a/doc/sphinx-guides/source/container/img/intellij-payara-config-server-behaviour.png b/doc/sphinx-guides/source/container/img/intellij-payara-config-server-behaviour.png new file mode 100644 index 00000000000..5d23672e614 Binary files /dev/null and b/doc/sphinx-guides/source/container/img/intellij-payara-config-server-behaviour.png differ diff --git a/doc/sphinx-guides/source/container/img/intellij-payara-config-server.png b/doc/sphinx-guides/source/container/img/intellij-payara-config-server.png new file mode 100644 index 00000000000..614bda6f6d7 Binary files /dev/null and b/doc/sphinx-guides/source/container/img/intellij-payara-config-server.png differ diff --git a/doc/sphinx-guides/source/container/img/intellij-payara-config-startup.png b/doc/sphinx-guides/source/container/img/intellij-payara-config-startup.png new file mode 100644 index 00000000000..35b87148859 Binary files /dev/null and b/doc/sphinx-guides/source/container/img/intellij-payara-config-startup.png differ diff --git a/doc/sphinx-guides/source/container/img/intellij-payara-plugin-install.png b/doc/sphinx-guides/source/container/img/intellij-payara-plugin-install.png new file mode 100644 index 00000000000..7c6896574de Binary files /dev/null and b/doc/sphinx-guides/source/container/img/intellij-payara-plugin-install.png differ diff --git a/doc/sphinx-guides/source/container/img/intellij-payara-run-menu-reload.png b/doc/sphinx-guides/source/container/img/intellij-payara-run-menu-reload.png new file mode 100644 index 00000000000..b1fd8bea260 Binary files /dev/null and b/doc/sphinx-guides/source/container/img/intellij-payara-run-menu-reload.png differ diff --git a/doc/sphinx-guides/source/container/img/intellij-payara-run-output.png b/doc/sphinx-guides/source/container/img/intellij-payara-run-output.png new file mode 100644 index 00000000000..aa139485a9d Binary files /dev/null and b/doc/sphinx-guides/source/container/img/intellij-payara-run-output.png differ diff --git a/doc/sphinx-guides/source/container/img/intellij-payara-run-toolbar.png b/doc/sphinx-guides/source/container/img/intellij-payara-run-toolbar.png new file mode 100644 index 00000000000..2aecb27c5f3 Binary files /dev/null and b/doc/sphinx-guides/source/container/img/intellij-payara-run-toolbar.png differ diff --git a/doc/sphinx-guides/source/container/img/netbeans-compile.png b/doc/sphinx-guides/source/container/img/netbeans-compile.png new file mode 100644 index 00000000000..e429695ccb0 Binary files /dev/null and b/doc/sphinx-guides/source/container/img/netbeans-compile.png differ diff --git a/doc/sphinx-guides/source/container/img/netbeans-run.png b/doc/sphinx-guides/source/container/img/netbeans-run.png new file mode 100644 index 00000000000..00f8af23cc5 Binary files /dev/null and b/doc/sphinx-guides/source/container/img/netbeans-run.png differ diff --git a/doc/sphinx-guides/source/container/img/netbeans-servers-common.png b/doc/sphinx-guides/source/container/img/netbeans-servers-common.png new file mode 100644 index 00000000000..a9ded5dbec3 Binary files /dev/null and b/doc/sphinx-guides/source/container/img/netbeans-servers-common.png differ diff --git a/doc/sphinx-guides/source/container/img/netbeans-servers-java.png b/doc/sphinx-guides/source/container/img/netbeans-servers-java.png new file mode 100644 index 00000000000..2593cacc5ae Binary files /dev/null and b/doc/sphinx-guides/source/container/img/netbeans-servers-java.png differ diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst index 4bbc87a4845..abf871dd340 100644 --- a/doc/sphinx-guides/source/container/index.rst +++ b/doc/sphinx-guides/source/container/index.rst @@ -1,28 +1,12 @@ Container Guide =============== -Running the Dataverse software in containers is quite different than in a :doc:`standard installation <../installation/prep>`. - -Both approaches have pros and cons. These days, containers are very often used for development and testing, -but there is an ever rising move toward running applications in the cloud using container technology. - -**NOTE:** -**As the Institute for Quantitative Social Sciences (IQSS) at Harvard is running a standard, non-containerized installation, -container support described in this guide is mostly created and maintained by the Dataverse community on a best-effort -basis.** - -This guide is *not* about installation on technology like Docker Swarm, Kubernetes, Rancher or other -solutions to run containers in production. There is the `Dataverse on K8s project `_ for this -purpose, as mentioned in the :doc:`/developers/containers` section of the Developer Guide. - -This guide focuses on describing the container images managed from the main Dataverse repository (again: by the -community, not IQSS), their features and limitations. Instructions on how to build the images yourself and how to -develop and extend them further are provided. - **Contents:** .. toctree:: + intro + running/index dev-usage base-image app-image diff --git a/doc/sphinx-guides/source/container/intro.rst b/doc/sphinx-guides/source/container/intro.rst new file mode 100644 index 00000000000..5099531dcc9 --- /dev/null +++ b/doc/sphinx-guides/source/container/intro.rst @@ -0,0 +1,28 @@ +Introduction +============ + +Dataverse in containers! + +.. contents:: |toctitle| + :local: + +Intended Audience +----------------- + +This guide is intended for anyone who wants to run Dataverse in containers. This is potentially a wide audience, from sysadmins interested in running Dataverse in production in containers (not recommended yet) to contributors working on a bug fix (encouraged!). See :doc:`running/index` for various scenarios and please let us know if your use case is not covered. + +.. _getting-help-containers: + +Getting Help +------------ + +Please ask in #containers at https://chat.dataverse.org + +Alternatively, you can try one or more of the channels under :ref:`support`. + +.. _helping-containers: + +Helping with the Containerization Effort +---------------------------------------- + +In 2023 the Containerization Working Group started meeting regularly. All are welcome to join! We talk in #containers at https://chat.dataverse.org and have a regular video call. For details, please visit https://ct.gdcc.io diff --git a/doc/sphinx-guides/source/container/running/backend-dev.rst b/doc/sphinx-guides/source/container/running/backend-dev.rst new file mode 100644 index 00000000000..8b2dab956ad --- /dev/null +++ b/doc/sphinx-guides/source/container/running/backend-dev.rst @@ -0,0 +1,10 @@ +Backend Development +=================== + +.. contents:: |toctitle| + :local: + +Intro +----- + +See :doc:`../dev-usage`. diff --git a/doc/sphinx-guides/source/container/running/demo.rst b/doc/sphinx-guides/source/container/running/demo.rst new file mode 100644 index 00000000000..24027e677a1 --- /dev/null +++ b/doc/sphinx-guides/source/container/running/demo.rst @@ -0,0 +1,217 @@ +Demo or Evaluation +================== + +In the following tutorial we'll walk through spinning up Dataverse in containers for demo or evaluation purposes. + +.. contents:: |toctitle| + :local: + +Quickstart +---------- + +First, let's confirm that we can get Dataverse running on your system. + +- Download :download:`compose.yml <../../../../../docker/compose/demo/compose.yml>` +- Run ``docker compose up`` in the directory where you put ``compose.yml`` +- Visit http://localhost:8080 and try logging in: + + - username: dataverseAdmin + - password: admin1 + +If you can log in, great! Please continue through the tutorial. If you have any trouble, please consult the sections below on troubleshooting and getting help. + +Stopping and Starting the Containers +------------------------------------ + +Let's practice stopping the containers and starting them up again. Your data, stored in a directory called ``data``, will remain intact + +To stop the containers hit ``Ctrl-c`` (hold down the ``Ctrl`` key and then hit the ``c`` key). + +To start the containers, run ``docker compose up``. + +Deleting Data and Starting Over +------------------------------- + +Again, data related to your Dataverse installation such as the database is stored in a directory called ``data`` that gets created in the directory where you ran ``docker compose`` commands. + +You may reach a point during your demo or evaluation that you'd like to start over with a fresh database. Simply make sure the containers are not running and then remove the ``data`` directory. Now, as before, you can run ``docker compose up`` to spin up the containers. + +Setting Up for a Demo +--------------------- + +Now that you are familiar with the basics of running Dataverse in containers, let's move on to a better setup for a demo or evaluation. + +Starting Fresh +++++++++++++++ + +For this exercise, please start fresh by stopping all containers and removing the ``data`` directory. + +Creating and Running a Demo Persona ++++++++++++++++++++++++++++++++++++ + +Previously we used the "dev" persona to bootstrap Dataverse, but for security reasons, we should create a persona more suited to demos and evaluations. + +Edit the ``compose.yml`` file and look for the following section. + +.. code-block:: bash + + bootstrap: + container_name: "bootstrap" + image: gdcc/configbaker:alpha + restart: "no" + command: + - bootstrap.sh + - dev + #- demo + #volumes: + # - ./demo:/scripts/bootstrap/demo + networks: + - dataverse + +Comment out "dev" and uncomment "demo". + +Uncomment the "volumes" section. + +Create a directory called "demo" and copy :download:`init.sh <../../../../../modules/container-configbaker/scripts/bootstrap/demo/init.sh>` into it. You are welcome to edit this demo init script, customizing the final message, for example. + +Note that the init script contains a key for using the admin API once it is blocked. You should change it in the script from "unblockme" to something only you know. + +Now run ``docker compose up``. The "bootstrap" container should exit with the message from the init script and Dataverse should be running on http://localhost:8080 as before during the quickstart exercise. + +One of the main differences between the "dev" persona and our new "demo" persona is that we are now running the setup-all script without the ``--insecure`` flag. This makes our installation more secure, though it does block "admin" APIs that are useful for configuration. + +Smoke Testing +------------- + +At this point, please try the following basic operations within your installation: + +- logging in as dataverseAdmin (password "admin1") +- publishing the "root" collection (dataverse) +- creating a collection +- creating a dataset +- uploading a data file +- publishing the dataset + +If anything isn't working, please see the sections below on troubleshooting, giving feedback, and getting help. + +Further Configuration +--------------------- + +Now that we've verified through a smoke test that basic operations are working, let's configure our installation of Dataverse. + +Please refer to the :doc:`/installation/config` section of the Installation Guide for various configuration options. + +Below we'll explain some specifics for configuration in containers. + +JVM Options/MicroProfile Config ++++++++++++++++++++++++++++++++ + +:ref:`jvm-options` can be configured under ``JVM_ARGS`` in the ``compose.yml`` file. Here's an example: + +.. code-block:: bash + + environment: + JVM_ARGS: -Ddataverse.files.storage-driver-id=file1 + +Some JVM options can be configured as environment variables. For example, you can configure the database host like this: + +.. code-block:: bash + + environment: + DATAVERSE_DB_HOST: postgres + +We are in the process of making more JVM options configurable as environment variables. Look for the term "MicroProfile Config" in under :doc:`/installation/config` in the Installation Guide to know if you can use them this way. + +Please note that for a few environment variables (the ones that start with ``%ct`` in :download:`microprofile-config.properties <../../../../../src/main/resources/META-INF/microprofile-config.properties>`), you have to prepend ``_CT_`` to make, for example, ``_CT_DATAVERSE_SITEURL``. We are working on a fix for this in https://github.com/IQSS/dataverse/issues/10285. + +There is a final way to configure JVM options that we plan to deprecate once all JVM options have been converted to MicroProfile Config. Look for "magic trick" under "tunables" at :doc:`../app-image` for more information. + +Database Settings ++++++++++++++++++ + +Generally, you should be able to look at the list of :ref:`database-settings` and configure them but the "demo" persona above secured your installation to the point that you'll need an "unblock key" to access the "admin" API and change database settings. + +In the example below of configuring :ref:`:FooterCopyright` we use the default unblock key of "unblockme" but you should use the key you set above. + +``curl -X PUT -d ", My Org" "http://localhost:8080/api/admin/settings/:FooterCopyright?unblock-key=unblockme"`` + +One you make this change it should be visible in the copyright in the bottom left of every page. + +Next Steps +---------- + +From here, you are encouraged to continue poking around, configuring, and testing. You probably spend a lot of time reading the :doc:`/installation/config` section of the Installation Guide. + +Please consider giving feedback using the methods described below. Good luck with your demo! + +About the Containers +-------------------- + +Now that you've gone through the tutorial, you might be interested in the various containers you've spun up and what they do. + +Container List +++++++++++++++ + +If you run ``docker ps``, you'll see that multiple containers are spun up in a demo or evaluation. Here are the most important ones: + +- dataverse +- postgres +- solr +- smtp +- bootstrap + +Most are self-explanatory, and correspond to components listed under :doc:`/installation/prerequisites` in the (traditional) Installation Guide, but "bootstrap" refers to :doc:`../configbaker-image`. + +Additional containers are used in development (see :doc:`../dev-usage`), but for the purposes of a demo or evaluation, fewer moving (sometimes pointy) parts are included. + +Tags and Versions ++++++++++++++++++ + +The compose file references a tag called "alpha", which corresponds to the latest released version of Dataverse. This means that if a release of Dataverse comes out while you are demo'ing or evaluating, the version of Dataverse you are using could change if you do a ``docker pull``. We are aware that there is a desire for tags that correspond to versions to ensure consistency. You are welcome to join `the discussion `_ and otherwise get in touch (see :ref:`helping-containers`). For more on tags, see :ref:`supported-image-tags-app`. + +Once Dataverse is running, you can check which version you have through the normal methods: + +- Check the bottom right in a web browser. +- Check http://localhost:8080/api/info/version via API. + +Troubleshooting +--------------- + +Hardware and Software Requirements +++++++++++++++++++++++++++++++++++ + +- 8 GB RAM (if not much else is running) +- Mac, Linux, or Windows (experimental) +- Docker + +Windows support is experimental but we are very interested in supporting Windows better. Please report bugs (see :ref:`helping-containers`). + +Bootstrapping Did Not Complete +++++++++++++++++++++++++++++++ + +In the compose file, try increasing the timeout in the bootstrap container by adding something like this: + +.. code-block:: bash + + environment: + - TIMEOUT=10m + +Wrapping Up +----------- + +Deleting the Containers and Data +++++++++++++++++++++++++++++++++ + +If you no longer need the containers because your demo or evaluation is finished and you want to reclaim disk space, run ``docker compose down`` in the directory where you put ``compose.yml``. + +You might also want to delete the ``data`` directory, as described above. + +Giving Feedback +--------------- + +Your feedback is extremely valuable to us! To let us know what you think, please see :ref:`helping-containers`. + +Getting Help +------------ + +Please do not be shy about reaching out for help. We very much want you to have a pleasant demo or evaluation experience. For ways to contact us, please see See :ref:`getting-help-containers`. diff --git a/doc/sphinx-guides/source/container/running/frontend-dev.rst b/doc/sphinx-guides/source/container/running/frontend-dev.rst new file mode 100644 index 00000000000..88d40c12053 --- /dev/null +++ b/doc/sphinx-guides/source/container/running/frontend-dev.rst @@ -0,0 +1,10 @@ +Frontend Development +==================== + +.. contents:: |toctitle| + :local: + +Intro +----- + +The frontend (web interface) of Dataverse is being decoupled from the backend. This evolving codebase has its own repo at https://github.com/IQSS/dataverse-frontend which includes docs and scripts for running the backend of Dataverse in Docker. diff --git a/doc/sphinx-guides/source/container/running/github-action.rst b/doc/sphinx-guides/source/container/running/github-action.rst new file mode 100644 index 00000000000..ae42dd494d1 --- /dev/null +++ b/doc/sphinx-guides/source/container/running/github-action.rst @@ -0,0 +1,18 @@ +GitHub Action +============= + +.. contents:: |toctitle| + :local: + +Intro +----- + +A GitHub Action is under development that will spin up a Dataverse instance within the context of GitHub CI workflows: https://github.com/gdcc/dataverse-action + +Use Cases +--------- + +Use cases for the GitHub Action include: + +- Testing :doc:`/api/client-libraries` that interact with Dataverse APIs +- Testing :doc:`/admin/integrations` of third party software with Dataverse diff --git a/doc/sphinx-guides/source/container/running/index.rst b/doc/sphinx-guides/source/container/running/index.rst new file mode 100755 index 00000000000..a02266f7cba --- /dev/null +++ b/doc/sphinx-guides/source/container/running/index.rst @@ -0,0 +1,13 @@ +Running Dataverse in Docker +=========================== + +Contents: + +.. toctree:: + + production + demo + metadata-blocks + github-action + frontend-dev + backend-dev diff --git a/doc/sphinx-guides/source/container/running/metadata-blocks.rst b/doc/sphinx-guides/source/container/running/metadata-blocks.rst new file mode 100644 index 00000000000..fcc80ce1909 --- /dev/null +++ b/doc/sphinx-guides/source/container/running/metadata-blocks.rst @@ -0,0 +1,15 @@ +Editing Metadata Blocks +======================= + +.. contents:: |toctitle| + :local: + +Intro +----- + +The Admin Guide has a section on :doc:`/admin/metadatacustomization` and suggests running Dataverse in containers (Docker) for this purpose. + +Status +------ + +For now, please see :doc:`demo`, which should also provide a suitable Dockerized Dataverse environment. diff --git a/doc/sphinx-guides/source/container/running/production.rst b/doc/sphinx-guides/source/container/running/production.rst new file mode 100644 index 00000000000..0a628dc57b9 --- /dev/null +++ b/doc/sphinx-guides/source/container/running/production.rst @@ -0,0 +1,20 @@ +Production (Future) +=================== + +.. contents:: |toctitle| + :local: + +Status +------ + +The images described in this guide are not yet recommended for production usage. + +How to Help +----------- + +You can help the effort to support these images in production by trying them out (see :doc:`demo`) and giving feedback (see :ref:`helping-containers`). + +Alternatives +------------ + +Until the images are ready for production, please use the traditional installation method described in the :doc:`/installation/index`. diff --git a/doc/sphinx-guides/source/developers/api-design.rst b/doc/sphinx-guides/source/developers/api-design.rst new file mode 100755 index 00000000000..e7a7a6408bb --- /dev/null +++ b/doc/sphinx-guides/source/developers/api-design.rst @@ -0,0 +1,63 @@ +========== +API Design +========== + +API design is a large topic. We expect this page to grow over time. + +.. contents:: |toctitle| + :local: + +Paths +----- + +A reminder `from Wikipedia `_ of what a path is: + +.. code-block:: bash + + userinfo host port + ┌──┴───┠┌──────┴──────┠┌┴┠+ https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top + └─┬─┘ └─────────────┬────────────┘└───────┬───────┘ └────────────┬────────────┘ └┬┘ + scheme authority path query fragment + +Exposing Settings +~~~~~~~~~~~~~~~~~ + +Since Dataverse 4, database settings have been exposed via API at http://localhost:8080/api/admin/settings + +(JVM options are probably available via the Payara REST API, but this is out of scope.) + +Settings need to be exposed outside to API clients outside of ``/api/admin`` (which is typically restricted to localhost). Here are some guidelines to follow when exposing settings. + +- When you are exposing a database setting as-is: + + - Use ``/api/info/settings`` as the root path. + + - Append the name of the setting including the colon (e.g. ``:DatasetPublishPopupCustomText``) + + - Final path example: ``/api/info/settings/:DatasetPublishPopupCustomText`` + +- If the absence of the database setting is filled in by a default value (e.g. ``:ZipDownloadLimit`` or ``:ApiTermsOfUse``): + + - Use ``/api/info`` as the root path. + + - Append the setting but remove the colon and downcase the first character (e.g. ``zipDownloadLimit``) + + - Final path example: ``/api/info/zipDownloadLimit`` + +- If the database setting you're exposing make more sense outside of ``/api/info`` because there's more context (e.g. ``:CustomDatasetSummaryFields``): + + - Feel free to use a path outside of ``/api/info`` as the root path. + + - Given additional context, append a shortened name (e.g. ``/api/datasets/summaryFieldNames``). + + - Final path example: ``/api/datasets/summaryFieldNames`` + +- If you need to expose a JVM option (MicroProfile setting) such as ``dataverse.api.allow-incomplete-metadata``: + + - Use ``/api/info`` as the root path. + + - Append a meaningful name for the setting (e.g. ``incompleteMetadataViaApi``). + + - Final path example: ``/api/info/incompleteMetadataViaApi`` + diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 04885571a01..8d891e63317 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -149,26 +149,39 @@ Globus File Transfer Note: Globus file transfer is still experimental but feedback is welcome! See :ref:`support`. -Users can transfer files via `Globus `_ into and out of datasets when their Dataverse installation is configured to use a Globus accessible S3 store and a community-developed `dataverse-globus `_ "transfer" app has been properly installed and configured. +Users can transfer files via `Globus `_ into and out of datasets, or reference files on a remote Globus endpoint, when their Dataverse installation is configured to use a Globus accessible store(s) +and a community-developed `dataverse-globus `_ app has been properly installed and configured. -Due to differences in the access control models of a Dataverse installation and Globus, enabling the Globus capability on a store will disable the ability to restrict and embargo files in that store. +Globus endpoints can be in a variety of places, from data centers to personal computers. +This means that from within the Dataverse software, a Globus transfer can feel like an upload or a download (with Globus Personal Connect running on your laptop, for example) or it can feel like a true transfer from one server to another (from a cluster in a data center into a Dataverse dataset or vice versa). -As Globus aficionados know, Globus endpoints can be in a variety of places, from data centers to personal computers. This means that from within the Dataverse software, a Globus transfer can feel like an upload or a download (with Globus Personal Connect running on your laptop, for example) or it can feel like a true transfer from one server to another (from a cluster in a data center into a Dataverse dataset or vice versa). - -Globus transfer uses a very efficient transfer mechanism and has additional features that make it suitable for large files and large numbers of files: +Globus transfer uses an efficient transfer mechanism and has additional features that make it suitable for large files and large numbers of files: * robust file transfer capable of restarting after network or endpoint failures * third-party transfer, which enables a user accessing a Dataverse installation in their desktop browser to initiate transfer of their files from a remote endpoint (i.e. on a local high-performance computing cluster), directly to an S3 store managed by the Dataverse installation -Globus transfer requires use of the Globus S3 connector which requires a paid Globus subscription at the host institution. Users will need a Globus account which could be obtained via their institution or directly from Globus (at no cost). +Note: Due to differences in the access control models of a Dataverse installation and Globus and the current Globus store model, Dataverse cannot enforce per-file-access restrictions. +It is therefore recommended that a store be configured as public, which disables the ability to restrict and embargo files in that store, when Globus access is allowed. + +Dataverse supports three options for using Globus, two involving transfer to Dataverse-managed endpoints and one allowing Dataverse to reference files on remote endpoints. +Dataverse-managed endpoints must be Globus 'guest collections' hosted on either a file-system-based endpoint or an S3-based endpoint (the latter requires use of the Globus +S3 connector which requires a paid Globus subscription at the host institution). In either case, Dataverse is configured with the Globus credentials of a user account that can manage the endpoint. +Users will need a Globus account, which can be obtained via their institution or directly from Globus (at no cost). + +With the file-system endpoint, Dataverse does not currently have access to the file contents. Thus, functionality related to ingest, previews, fixity hash validation, etc. are not available. (Using the S3-based endpoint, Dataverse has access via S3 and all functionality normally associated with direct uploads to S3 is available.) + +For the reference use case, Dataverse must be configured with a list of allowed endpoint/base paths from which files may be referenced. In this case, since Dataverse is not accessing the remote endpoint itself, it does not need Globus credentials. +Users will need a Globus account in this case, and the remote endpoint must be configured to allow them access (i.e. be publicly readable, or potentially involving some out-of-band mechanism to request access (that could be described in the dataset's Terms of Use and Access). + +All of Dataverse's Globus capabilities are now store-based (see the store documentation) and therefore different collections/datasets can be configured to use different Globus-capable stores (or normal file, S3 stores, etc.) -The setup required to enable Globus is described in the `Community Dataverse-Globus Setup and Configuration document `_ and the references therein. +More details of the setup required to enable Globus is described in the `Community Dataverse-Globus Setup and Configuration document `_ and the references therein. As described in that document, Globus transfers can be initiated by choosing the Globus option in the dataset upload panel. (Globus, which does asynchronous transfers, is not available during dataset creation.) Analogously, "Globus Transfer" is one of the download options in the "Access Dataset" menu and optionally the file landing page download menu (if/when supported in the dataverse-globus app). An overview of the control and data transfer interactions between components was presented at the 2022 Dataverse Community Meeting and can be viewed in the `Integrations and Tools Session Video `_ around the 1 hr 28 min mark. -See also :ref:`Globus settings <:GlobusBasicToken>`. +See also :ref:`Globus settings <:GlobusSettings>`. Data Capture Module (DCM) ------------------------- diff --git a/doc/sphinx-guides/source/developers/classic-dev-env.rst b/doc/sphinx-guides/source/developers/classic-dev-env.rst index 062a1bb36f3..6978f389e01 100755 --- a/doc/sphinx-guides/source/developers/classic-dev-env.rst +++ b/doc/sphinx-guides/source/developers/classic-dev-env.rst @@ -46,7 +46,7 @@ On Linux, you are welcome to use the OpenJDK available from package managers. Install Netbeans or Maven ~~~~~~~~~~~~~~~~~~~~~~~~~ -NetBeans IDE is recommended, and can be downloaded from http://netbeans.org . Developers may use any editor or IDE. We recommend NetBeans because it is free, works cross platform, has good support for Jakarta EE projects, and includes a required build tool, Maven. +NetBeans IDE is recommended, and can be downloaded from https://netbeans.org . Developers may use any editor or IDE. We recommend NetBeans because it is free, works cross platform, has good support for Jakarta EE projects, and includes a required build tool, Maven. Below we describe how to build the Dataverse Software war file with Netbeans but if you prefer to use only Maven, you can find installation instructions in the :doc:`tools` section. @@ -86,7 +86,9 @@ On Mac, run this command: ``brew install jq`` -On Linux, install ``jq`` from your package manager or download a binary from http://stedolan.github.io/jq/ +On Linux, install ``jq`` from your package manager or download a binary from https://stedolan.github.io/jq/ + +.. _install-payara-dev: Install Payara ~~~~~~~~~~~~~~ @@ -134,7 +136,7 @@ On Linux, you should just install PostgreSQL using your favorite package manager Install Solr ^^^^^^^^^^^^ -`Solr `_ 9.3.0 is required. +`Solr `_ 9.3.0 is required. To install Solr, execute the following commands: @@ -144,7 +146,7 @@ To install Solr, execute the following commands: ``cd /usr/local/solr`` -``curl -O http://archive.apache.org/dist/solr/solr/9.3.0/solr-9.3.0.tgz`` +``curl -O https://archive.apache.org/dist/solr/solr/9.3.0/solr-9.3.0.tgz`` ``tar xvfz solr-9.3.0.tgz`` @@ -260,7 +262,3 @@ Next Steps If you can log in to the Dataverse installation, great! If not, please see the :doc:`troubleshooting` section. For further assistance, please see "Getting Help" in the :doc:`intro` section. You're almost ready to start hacking on code. Now that the installer script has you up and running, you need to continue on to the :doc:`tips` section to get set up to deploy code from your IDE or the command line. - ----- - -Previous: :doc:`intro` | Next: :doc:`tips` diff --git a/doc/sphinx-guides/source/developers/coding-style.rst b/doc/sphinx-guides/source/developers/coding-style.rst index 0c00f611a7f..9da7836bbf4 100755 --- a/doc/sphinx-guides/source/developers/coding-style.rst +++ b/doc/sphinx-guides/source/developers/coding-style.rst @@ -57,7 +57,7 @@ Place curly braces according to the style below, which is an example you can see Format Code You Changed with Netbeans ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -As you probably gathered from the :doc:`dev-environment` section, IQSS has standardized on Netbeans. It is much appreciated when you format your code (but only the code you touched!) using the out-of-the-box Netbeans configuration. If you have created an entirely new Java class, you can just click Source -> Format. If you are adjusting code in an existing class, highlight the code you changed and then click Source -> Format. Keeping the "diff" in your pull requests small makes them easier to code review. +IQSS has standardized on Netbeans. It is much appreciated when you format your code (but only the code you touched!) using the out-of-the-box Netbeans configuration. If you have created an entirely new Java class, you can just click Source -> Format. If you are adjusting code in an existing class, highlight the code you changed and then click Source -> Format. Keeping the "diff" in your pull requests small makes them easier to code review. Checking Your Formatting With Checkstyle ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -131,7 +131,3 @@ Bike Shedding What color should the `bike shed `_ be? :) Come debate with us about coding style in this Google doc that has public comments enabled: https://docs.google.com/document/d/1KTd3FpM1BI3HlBofaZjMmBiQEJtFf11jiiGpQeJzy7A/edit?usp=sharing - ----- - -Previous: :doc:`debugging` | Next: :doc:`deployment` diff --git a/doc/sphinx-guides/source/developers/containers.rst b/doc/sphinx-guides/source/developers/containers.rst index 175b178b455..ed477ccefea 100755 --- a/doc/sphinx-guides/source/developers/containers.rst +++ b/doc/sphinx-guides/source/developers/containers.rst @@ -29,7 +29,3 @@ Using Containers for Reproducible Research ------------------------------------------ Please see :ref:`research-code` in the User Guide for this related topic. - ----- - -Previous: :doc:`deployment` | Next: :doc:`making-releases` diff --git a/doc/sphinx-guides/source/developers/debugging.rst b/doc/sphinx-guides/source/developers/debugging.rst index 50e8901b1ff..ffee6764b7f 100644 --- a/doc/sphinx-guides/source/developers/debugging.rst +++ b/doc/sphinx-guides/source/developers/debugging.rst @@ -63,7 +63,3 @@ to maintain your settings more easily for different environments. .. _Jakarta Server Faces 3.0 Spec: https://jakarta.ee/specifications/faces/3.0/jakarta-faces-3.0.html#a6088 .. _PrimeFaces Configuration Docs: https://primefaces.github.io/primefaces/11_0_0/#/gettingstarted/configuration - ----- - -Previous: :doc:`documentation` | Next: :doc:`coding-style` diff --git a/doc/sphinx-guides/source/developers/dependencies.rst b/doc/sphinx-guides/source/developers/dependencies.rst index 0208c49f90a..26880374f23 100644 --- a/doc/sphinx-guides/source/developers/dependencies.rst +++ b/doc/sphinx-guides/source/developers/dependencies.rst @@ -444,7 +444,3 @@ The codebase is structured like this: .. [#f1] Modern IDEs import your Maven POM and offer import autocompletion for classes based on direct dependencies in the model. You might end up using legacy or repackaged classes because of a wrong scope. .. [#f2] This is going to bite back in modern IDEs when importing classes from transitive dependencies by "autocompletion accident". - ----- - -Previous: :doc:`documentation` | Next: :doc:`debugging` diff --git a/doc/sphinx-guides/source/developers/deployment.rst b/doc/sphinx-guides/source/developers/deployment.rst index 045b0d0abbc..678e29f4079 100755 --- a/doc/sphinx-guides/source/developers/deployment.rst +++ b/doc/sphinx-guides/source/developers/deployment.rst @@ -114,7 +114,7 @@ Please note that while the script should work well on new-ish branches, older br Migrating Datafiles from Local Storage to S3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -A number of pilot Dataverse installations start on local storage, then administrators are tasked with migrating datafiles into S3 or similar object stores. The files may be copied with a command-line utility such as `s3cmd`. You will want to retain the local file hierarchy, keeping the authority (for example: 10.5072) at the bucket "root." +A number of pilot Dataverse installations start on local storage, then administrators are tasked with migrating datafiles into S3 or similar object stores. The files may be copied with a command-line utility such as `s3cmd `_. You will want to retain the local file hierarchy, keeping the authority (for example: 10.5072) at the bucket "root." The below example queries may assist with updating dataset and datafile locations in the Dataverse installation's PostgresQL database. Depending on the initial version of the Dataverse Software and subsequent upgrade path, Datafile storage identifiers may or may not include a ``file://`` prefix, so you'll want to catch both cases. @@ -146,8 +146,3 @@ To Update Datafile Location to your-s3-bucket, Assuming no ``file://`` Prefix WHERE id IN (SELECT o.id FROM dvobject o, dataset s WHERE o.dtype = 'DataFile' AND s.id = o.owner_id AND s.harvestingclient_id IS null AND o.storageidentifier NOT LIKE '%://%'); - - ----- - -Previous: :doc:`coding-style` | Next: :doc:`containers` diff --git a/doc/sphinx-guides/source/developers/dev-environment.rst b/doc/sphinx-guides/source/developers/dev-environment.rst index 1301994cc82..2837f901d5e 100755 --- a/doc/sphinx-guides/source/developers/dev-environment.rst +++ b/doc/sphinx-guides/source/developers/dev-environment.rst @@ -71,10 +71,10 @@ After some time you should be able to log in: - username: dataverseAdmin - password: admin1 -More Information ----------------- +Next Steps +---------- -See also the :doc:`/container/dev-usage` section of the Container Guide. +See the :doc:`/container/dev-usage` section of the Container Guide for tips on fast redeployment, viewing logs, and more. Getting Help ------------ diff --git a/doc/sphinx-guides/source/developers/documentation.rst b/doc/sphinx-guides/source/developers/documentation.rst index f0729c59dcf..a4b8c027445 100755 --- a/doc/sphinx-guides/source/developers/documentation.rst +++ b/doc/sphinx-guides/source/developers/documentation.rst @@ -8,7 +8,7 @@ Writing Documentation Quick Fix ----------- -If you find a typo or a small error in the documentation you can fix it using GitHub's online web editor. Generally speaking, we will be following https://help.github.com/en/articles/editing-files-in-another-users-repository +If you find a typo or a small error in the documentation you can fix it using GitHub's online web editor. Generally speaking, we will be following https://docs.github.com/en/repositories/working-with-files/managing-files/editing-files#editing-files-in-another-users-repository - Navigate to https://github.com/IQSS/dataverse/tree/develop/doc/sphinx-guides/source where you will see folders for each of the guides: `admin`_, `api`_, `developers`_, `installation`_, `style`_, `user`_. - Find the file you want to edit under one of the folders above. @@ -18,7 +18,7 @@ If you find a typo or a small error in the documentation you can fix it using Gi - Under the **Write** tab, delete the long welcome message and write a few words about what you fixed. - Click **Create Pull Request**. -That's it! Thank you for your contribution! Your pull request will be added manually to the main Dataverse Project board at https://github.com/orgs/IQSS/projects/2 and will go through code review and QA before it is merged into the "develop" branch. Along the way, developers might suggest changes or make them on your behalf. Once your pull request has been merged you will be listed as a contributor at https://github.com/IQSS/dataverse/graphs/contributors +That's it! Thank you for your contribution! Your pull request will be added manually to the main Dataverse Project board at https://github.com/orgs/IQSS/projects/34 and will go through code review and QA before it is merged into the "develop" branch. Along the way, developers might suggest changes or make them on your behalf. Once your pull request has been merged you will be listed as a contributor at https://github.com/IQSS/dataverse/graphs/contributors Please see https://github.com/IQSS/dataverse/pull/5857 for an example of a quick fix that was merged (the "Files changed" tab shows how a typo was fixed). @@ -36,7 +36,7 @@ If you would like to read more about the Dataverse Project's use of GitHub, plea Building the Guides with Sphinx ------------------------------- -The Dataverse guides are written using Sphinx (http://sphinx-doc.org). We recommend installing Sphinx on your localhost or using a Sphinx Docker container to build the guides locally so you can get an accurate preview of your changes. +The Dataverse guides are written using Sphinx (https://sphinx-doc.org). We recommend installing Sphinx on your localhost or using a Sphinx Docker container to build the guides locally so you can get an accurate preview of your changes. In case you decide to use a Sphinx Docker container to build the guides, you can skip the next two installation sections, but you will need to have Docker installed. @@ -62,7 +62,7 @@ In some parts of the documentation, graphs are rendered as images using the Sphi Building the guides requires the ``dot`` executable from GraphViz. -This requires having `GraphViz `_ installed and either having ``dot`` on the path or +This requires having `GraphViz `_ installed and either having ``dot`` on the path or `adding options to the make call `_. Editing and Building the Guides @@ -71,7 +71,7 @@ Editing and Building the Guides To edit the existing documentation: - Create a branch (see :ref:`how-to-make-a-pull-request`). -- In ``doc/sphinx-guides/source`` you will find the .rst files that correspond to http://guides.dataverse.org. +- In ``doc/sphinx-guides/source`` you will find the .rst files that correspond to https://guides.dataverse.org. - Using your preferred text editor, open and edit the necessary files, or create new ones. Once you are done, you can preview the changes by building the guides locally. As explained, you can build the guides with Sphinx locally installed, or with a Docker container. @@ -159,7 +159,3 @@ A few notes about the command above: Also, as of this writing we have enabled PDF builds from the "develop" branch. You download the PDF from http://preview.guides.gdcc.io/_/downloads/en/develop/pdf/ If you would like to help improve the PDF version of the guides, please get in touch! Please see :ref:`getting-help-developers` for ways to contact the developer community. - ----- - -Previous: :doc:`testing` | Next: :doc:`dependencies` diff --git a/doc/sphinx-guides/source/developers/fontcustom.rst b/doc/sphinx-guides/source/developers/fontcustom.rst index 2a94b0ffc0b..edcda1e69ab 100755 --- a/doc/sphinx-guides/source/developers/fontcustom.rst +++ b/doc/sphinx-guides/source/developers/fontcustom.rst @@ -35,7 +35,7 @@ RVM is a good way to install a specific version of Ruby: https://rvm.io Install Dependencies and Font Custom Gem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The brew commands below assume you are on a Mac. See :doc:`dev-environment` for more on ``brew``. +The brew commands below assume you are on a Mac. .. code-block:: bash diff --git a/doc/sphinx-guides/source/developers/geospatial.rst b/doc/sphinx-guides/source/developers/geospatial.rst index 9744438bf5d..48d300524c2 100644 --- a/doc/sphinx-guides/source/developers/geospatial.rst +++ b/doc/sphinx-guides/source/developers/geospatial.rst @@ -34,7 +34,7 @@ For example: Upon recognition of the four required files, the Dataverse installation will group them as well as any other relevant files into a shapefile set. Files with these extensions will be included in the shapefile set: - Required: ``.shp``, ``.shx``, ``.dbf``, ``.prj`` -- Optional: ``.sbn``, ``.sbx``, ``.fbn``, ``.fbx``, ``.ain``, ``.aih``, ``.ixs``, ``.mxs``, ``.atx``, ``.cpg``, ``shp.xml`` +- Optional: ``.sbn``, ``.sbx``, ``.fbn``, ``.fbx``, ``.ain``, ``.aih``, ``.ixs``, ``.mxs``, ``.atx``, ``.cpg``, ``.qpj``, ``.qmd``, ``shp.xml`` Then the Dataverse installation creates a new ``.zip`` with mimetype as a shapefile. The shapefile set will persist as this new ``.zip``. @@ -81,7 +81,3 @@ For two "final" shapefile sets, ``bicycles.zip`` and ``subway_line.zip``, a new - Mimetype: ``application/zipped-shapefile`` - Mimetype Label: "Shapefile as ZIP Archive" - ----- - -Previous: :doc:`unf/index` | Next: :doc:`remote-users` diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst new file mode 100644 index 00000000000..834db8161f0 --- /dev/null +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -0,0 +1,239 @@ +Globus Transfer API +=================== + +.. contents:: |toctitle| + :local: + +The Globus API addresses three use cases: + +* Transfer to a Dataverse-managed Globus endpoint (File-based or using the Globus S3 Connector) +* Reference of files that will remain in a remote Globus endpoint +* Transfer from a Dataverse-managed Globus endpoint + +The ability for Dataverse to interact with Globus endpoints is configured via a Globus store - see :ref:`globus-storage`. + +Globus transfers (or referencing a remote endpoint) for upload and download transfers involve a series of steps. These can be accomplished using the Dataverse and Globus APIs. (These are used internally by the `dataverse-globus app `_ when transfers are done via the Dataverse UI.) + +Requesting Upload or Download Parameters +---------------------------------------- + +The first step in preparing for a Globus transfer/reference operation is to request the parameters relevant for a given dataset: + +.. code-block:: bash + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/globusUploadParameters?locale=$LOCALE" + +The response will be of the form: + +.. code-block:: bash + + { + "status": "OK", + "data": { + "queryParameters": { + "datasetId": 29, + "siteUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com", + "datasetVersion": ":draft", + "dvLocale": "en", + "datasetPid": "doi:10.5072/FK2/ILLPXE", + "managed": "true", + "endpoint": "d8c42580-6528-4605-9ad8-116a61982644" + }, + "signedUrls": [ + { + "name": "requestGlobusTransferPaths", + "httpMethod": "POST", + "signedUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com/api/v1/datasets/29/requestGlobusUploadPaths?until=2023-11-22T01:52:03.648&user=dataverseAdmin&method=POST&token=63ac4bb748d12078dded1074916508e19e6f6b61f64294d38e0b528010b07d48783cf2e975d7a1cb6d4a3c535f209b981c7c6858bc63afdfc0f8ecc8a139b44a", + "timeOut": 300 + }, + { + "name": "addGlobusFiles", + "httpMethod": "POST", + "signedUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com/api/v1/datasets/29/addGlobusFiles?until=2023-11-22T01:52:03.648&user=dataverseAdmin&method=POST&token=2aaa03f6b9f851a72e112acf584ffc0758ed0cc8d749c5a6f8c20494bb7bc13197ab123e1933f3dde2711f13b347c05e6cec1809a8f0b5484982570198564025", + "timeOut": 300 + }, + { + "name": "getDatasetMetadata", + "httpMethod": "GET", + "signedUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com/api/v1/datasets/29/versions/:draft?until=2023-11-22T01:52:03.649&user=dataverseAdmin&method=GET&token=1878d6a829cd5540e89c07bdaf647f1bea5314cc7a55433b0b506350dd330cad61ade3714a8ee199a7b464fb3b8cddaea0f32a89ac3bfc4a86cd2ea3004ecbb8", + "timeOut": 300 + }, + { + "name": "getFileListing", + "httpMethod": "GET", + "signedUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com/api/v1/datasets/29/versions/:draft/files?until=2023-11-22T01:52:03.650&user=dataverseAdmin&method=GET&token=78e8ca8321624f42602af659227998374ef3788d0feb43d696a0e19086e0f2b3b66b96981903a1565e836416c504b6248cd3c6f7c2644566979bd16e23a99622", + "timeOut": 300 + } + ] + } + } + +The response includes the id for the Globus endpoint to use along with several signed URLs. + +The getDatasetMetadata and getFileListing URLs are just signed versions of the standard Dataset metadata and file listing API calls. The other two are Globus specific. + +If called for, a dataset using a store that is configured with a remote Globus endpoint(s), the return response is similar but the response includes a +the "managed" parameter will be false, the "endpoint" parameter is replaced with a JSON array of "referenceEndpointsWithPaths" and the +requestGlobusTransferPaths and addGlobusFiles URLs are replaced with ones for requestGlobusReferencePaths and addFiles. All of these calls are +described further below. + +The call to set up for a transfer out (download) is similar: + +.. code-block:: bash + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/globusDownloadParameters?locale=$LOCALE" + +Note that this API call supports an additional downloadId query parameter. This is only used when the globus-dataverse app is called from the Dataverse user interface. There is no need to use it when calling the API directly. + +The returned response includes the same getDatasetMetadata and getFileListing URLs as in the upload case and includes "monitorGlobusDownload" and "requestGlobusDownload" URLs. The response will also indicate whether the store is "managed" and will provide the "endpoint" from which downloads can be made. + + +Performing an Upload/Transfer In +-------------------------------- + +The information from the API call above can be used to provide a user with information about the dataset and to prepare to transfer (managed=true) or to reference files (managed=false). + +Once the user identifies which files are to be added, the requestGlobusTransferPaths or requestGlobusReferencePaths URLs can be called. These both reference the same API call but must be used with different entries in the JSON body sent: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV + export LOCALE=en-US + + curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:application/json" -X POST "$SERVER_URL/api/datasets/:persistentId/requestGlobusUploadPaths" + +Note that when using the dataverse-globus app or the return from the previous call, the URL for this call will be signed and no API_TOKEN is needed. + +In the managed case, the JSON body sent must include the id of the Globus user that will perform the transfer and the number of files that will be transferred: + +.. code-block:: bash + + { + "principal":"d15d4244-fc10-47f3-a790-85bdb6db9a75", + "numberOfFiles":2 + } + +In the remote reference case, the JSON body sent must include the Globus endpoint/paths that will be referenced: + +.. code-block:: bash + + { + "referencedFiles":[ + "d8c42580-6528-4605-9ad8-116a61982644/hdc1/test1.txt" + ] + } + +The response will include a JSON object. In the managed case, the map is from new assigned file storageidentifiers and specific paths on the managed Globus endpoint: + +.. code-block:: bash + + { + "status":"OK", + "data":{ + "globusm://18b49d3688c-62137dcb06e4":"/hdc1/10.5072/FK2/ILLPXE/18b49d3688c-62137dcb06e4", + "globusm://18b49d3688c-5c17d575e820":"/hdc1/10.5072/FK2/ILLPXE/18b49d3688c-5c17d575e820" + } + } + +In the managed case, the specified Globus principal is granted write permission to the specified endpoint/path, +which will allow initiation of a transfer from the external endpoint to the managed endpoint using the Globus API. +The permission will be revoked if the transfer is not started and the next call to Dataverse to finish the transfer are not made within a short time (configurable, default of 5 minutes). + +In the remote/reference case, the map is from the initially supplied endpoint/paths to the new assigned file storageidentifiers: + +.. code-block:: bash + + { + "status":"OK", + "data":{ + "d8c42580-6528-4605-9ad8-116a61982644/hdc1/test1.txt":"globus://18bf8c933f4-ed2661e7d19b//d8c42580-6528-4605-9ad8-116a61982644/hdc1/test1.txt" + } + } + + + +Adding Files to the Dataset +--------------------------- + +In the managed case, you must initiate a Globus transfer and take note of its task identifier. As in the JSON example below, you will pass it as ``taskIdentifier`` along with details about the files you are transferring: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV + export JSON_DATA='{"taskIdentifier":"3f530302-6c48-11ee-8428-378be0d9c521", \ + "files": [{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b3972213f-f6b5c2221423", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "1234"}}, \ + {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b39722140-50eb7d3c5ece", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "2345"}}]}' + + curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles" -F "jsonData=$JSON_DATA" + +Note that the mimetype is multipart/form-data, matching the /addFiles API call. Also note that the API_TOKEN is not needed when using a signed URL. + +With this information, Dataverse will begin to monitor the transfer and when it completes, will add all files for which the transfer succeeded. +As the transfer can take significant time and the API call is asynchronous, the only way to determine if the transfer succeeded via API is to use the standard calls to check the dataset lock state and contents. + +Once the transfer completes, Dataverse will remove the write permission for the principal. + +Note that when using a managed endpoint that uses the Globus S3 Connector, the checksum should be correct as Dataverse can validate it. For file-based endpoints, the checksum should be included if available but Dataverse cannot verify it. + +In the remote/reference case, where there is no transfer to monitor, the standard /addFiles API call (see :ref:`direct-add-to-dataset-api`) is used instead. There are no changes for the Globus case. + +Downloading/Transfer Out Via Globus +----------------------------------- + +To begin downloading files, the requestGlobusDownload URL is used: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV + + curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:application/json" -X POST "$SERVER_URL/api/datasets/:persistentId/requestGlobusDownload" + +The JSON body sent should include a list of file ids to download and, for a managed endpoint, the Globus principal that will make the transfer: + +.. code-block:: bash + + { + "principal":"d15d4244-fc10-47f3-a790-85bdb6db9a75", + "fileIds":[60, 61] + } + +Note that this API call takes an optional downloadId parameter that is used with the dataverse-globus app. When downloadId is included, the list of fileIds is not needed. + +The response is a JSON object mapping the requested file Ids to Globus endpoint/paths. In the managed case, the principal will have been given read permissions for the specified paths: + +.. code-block:: bash + + { + "status":"OK", + "data":{ + "60": "d8c42580-6528-4605-9ad8-116a61982644/hdc1/10.5072/FK2/ILLPXE/18bf3af9c78-92b8e168090e", + "61": "d8c42580-6528-4605-9ad8-116a61982644/hdc1/10.5072/FK2/ILLPXE/18bf3af9c78-c8d81569305c" + } + } + +For the remote case, the use can perform the transfer without further contact with Dataverse. In the managed case, the user must initiate the transfer via the Globus API and then inform Dataverse. +Dataverse will then monitor the transfer and revoke the read permission when the transfer is complete. (Not making this last call could result in failure of the transfer.) + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV + + curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:application/json" -X POST "$SERVER_URL/api/datasets/:persistentId/monitorGlobusDownload" + +The JSON body sent just contains the task identifier for the transfer: + +.. code-block:: bash + + { + "taskIdentifier":"b5fd01aa-8963-11ee-83ae-d5484943e99a" + } + + diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst index 3ac9e955ea2..25fea138736 100755 --- a/doc/sphinx-guides/source/developers/index.rst +++ b/doc/sphinx-guides/source/developers/index.rst @@ -19,7 +19,9 @@ Developer Guide sql-upgrade-scripts testing documentation + api-design security + performance dependencies debugging coding-style @@ -38,6 +40,7 @@ Developer Guide big-data-support aux-file-support s3-direct-upload-api + globus-api dataset-semantic-metadata-api dataset-migration-api workflows diff --git a/doc/sphinx-guides/source/developers/intro.rst b/doc/sphinx-guides/source/developers/intro.rst index 4a64c407fc1..350968012d8 100755 --- a/doc/sphinx-guides/source/developers/intro.rst +++ b/doc/sphinx-guides/source/developers/intro.rst @@ -2,7 +2,7 @@ Introduction ============ -Welcome! `The Dataverse Project `_ is an `open source `_ project that loves `contributors `_! +Welcome! `The Dataverse Project `_ is an `open source `_ project that loves `contributors `_! .. contents:: |toctitle| :local: @@ -19,7 +19,7 @@ To get started, you'll want to set up your :doc:`dev-environment` and make sure Getting Help ------------ -If you have any questions at all, please reach out to other developers via the channels listed in https://github.com/IQSS/dataverse/blob/develop/CONTRIBUTING.md such as http://chat.dataverse.org, the `dataverse-dev `_ mailing list, `community calls `_, or support@dataverse.org. +If you have any questions at all, please reach out to other developers via the channels listed in https://github.com/IQSS/dataverse/blob/develop/CONTRIBUTING.md such as https://chat.dataverse.org, the `dataverse-dev `_ mailing list, `community calls `_, or support@dataverse.org. .. _core-technologies: @@ -37,10 +37,12 @@ Roadmap For the Dataverse Software development roadmap, please see https://www.iq.harvard.edu/roadmap-dataverse-project +.. _kanban-board: + Kanban Board ------------ -You can get a sense of what's currently in flight (in dev, in QA, etc.) by looking at https://github.com/orgs/IQSS/projects/2 +You can get a sense of what's currently in flight (in dev, in QA, etc.) by looking at https://github.com/orgs/IQSS/projects/34 Issue Tracker ------------- @@ -73,7 +75,3 @@ As a developer, you also may be interested in these projects related to Datavers - Third party apps - make use of Dataverse installation APIs: :doc:`/api/apps` - chat.dataverse.org - chat interface for Dataverse Project users and developers: https://github.com/IQSS/chat.dataverse.org - [Your project here] :) - ----- - -Next: :doc:`dev-environment` diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst index 23c4773a06e..e7a59910e56 100755 --- a/doc/sphinx-guides/source/developers/making-releases.rst +++ b/doc/sphinx-guides/source/developers/making-releases.rst @@ -14,16 +14,19 @@ See :doc:`version-control` for background on our branching strategy. The steps below describe making both regular releases and hotfix releases. +.. _write-release-notes: + Write Release Notes ------------------- -Developers express the need for an addition to release notes by creating a file in ``/doc/release-notes`` containing the name of the issue they're working on. The name of the branch could be used for the filename with ".md" appended (release notes are written in Markdown) such as ``5053-apis-custom-homepage.md``. +Developers express the need for an addition to release notes by creating a "release note snippet" in ``/doc/release-notes`` containing the name of the issue they're working on. The name of the branch could be used for the filename with ".md" appended (release notes are written in Markdown) such as ``5053-apis-custom-homepage.md``. See :ref:`writing-release-note-snippets` for how this is described for contributors. -The task at or near release time is to collect these notes into a single doc. +The task at or near release time is to collect these snippets into a single file. - Create an issue in GitHub to track the work of creating release notes for the upcoming release. -- Create a branch, add a .md file for the release (ex. 5.10.1 Release Notes) in ``/doc/release-notes`` and write the release notes, making sure to pull content from the issue-specific release notes mentioned above. -- Delete the previously-created, issue-specific release notes as the content is added to the main release notes file. +- Create a branch, add a .md file for the release (ex. 5.10.1 Release Notes) in ``/doc/release-notes`` and write the release notes, making sure to pull content from the release note snippets mentioned above. +- Delete the release note snippets as the content is added to the main release notes file. +- Include instructions to describe the steps required to upgrade the application from the previous version. These must be customized for release numbers and special circumstances such as changes to metadata blocks and infrastructure. - Take the release notes .md through the regular Code Review and QA process. Create a GitHub Issue and Branch for the Release @@ -67,6 +70,21 @@ Once important tests have passed (compile, unit tests, etc.), merge the pull req If this is a hotfix release, skip this whole "merge develop to master" step (the "develop" branch is not involved until later). +(Optional) Test Docker Images +----------------------------- + +After the "master" branch has been updated and the GitHub Action to build and push Docker images has run (see `PR #9776 `_), go to https://hub.docker.com/u/gdcc and make sure the "alpha" tag for the following images has been updated: + +- https://hub.docker.com/r/gdcc/base +- https://hub.docker.com/r/gdcc/dataverse +- https://hub.docker.com/r/gdcc/configbaker + +To test these images against our API test suite, go to the "alpha" workflow at https://github.com/gdcc/api-test-runner/actions/workflows/alpha.yml and run it. + +If there are failures, additional dependencies or settings may have been added to the "develop" workflow. Copy them over and try again. + +.. _build-guides: + Build the Guides for the Release -------------------------------- @@ -112,9 +130,11 @@ Go to https://jenkins.dataverse.org/job/IQSS_Dataverse_Internal/ and make the fo Click "Save" then "Build Now". -The build number will appear in ``/api/info/version`` (along with the commit mentioned above) from a running installation (e.g. ``{"version":"5.10.1","build":"907-b844672``). +This will build the war file, and then automatically deploy it on dataverse-internal. Verify that the application has deployed successfully. -Note that the build number comes from script in an early build step... +The build number will appear in ``/api/info/version`` (along with the commit mentioned above) from a running installation (e.g. ``{"version":"5.10.1","build":"907-b844672``). + +Note that the build number comes from the following script in an early Jenkins build step... .. code-block:: bash @@ -129,12 +149,16 @@ Build Installer (dvinstall.zip) ssh into the dataverse-internal server and do the following: - In a git checkout of the dataverse source switch to the master branch and pull the latest. -- Copy the war file from the previous step to the ``target`` directory in the root of the repo (create it, if necessary). +- Copy the war file from the previous step to the ``target`` directory in the root of the repo (create it, if necessary): +- ``mkdir target`` +- ``cp /tmp/dataverse-5.10.1.war target`` - ``cd scripts/installer`` - ``make`` A zip file called ``dvinstall.zip`` should be produced. +Alternatively, you can build the installer on your own dev. instance. But make sure you use the war file produced in the step above, not a war file build from master on your own system! That's because we want the released application war file to contain the build number described above. Download the war file directly from Jenkins, or from dataverse-internal. + Make Artifacts Available for Download ------------------------------------- @@ -148,6 +172,11 @@ Upload the following artifacts to the draft release you created: - metadata block tsv files - config files +Deploy on Demo +-------------- + +Now that you have the release ready to go, give it one final test by deploying it on https://demo.dataverse.org . Note that this is also an opportunity to re-test the upgrade checklist as described in the release note. + Publish the Release ------------------- @@ -158,7 +187,14 @@ Update Guides Link "latest" at https://guides.dataverse.org/en/latest/ is a symlink to the directory with the latest release. That directory (e.g. ``5.10.1``) was put into place by the Jenkins "guides" job described above. -ssh into the guides server and update the symlink to point to the latest release. +ssh into the guides server and update the symlink to point to the latest release, as in the example below. + +.. code-block:: bash + + cd /var/www/html/en + ln -s 5.10.1 latest + + Close Milestone on GitHub and Create a New One ---------------------------------------------- @@ -194,7 +230,3 @@ We've merged the hotfix into the "master" branch but now we need the fixes (and Because of the hotfix version, any SQL scripts in "develop" should be renamed (from "5.11.0" to "5.11.1" for example). To read more about our naming conventions for SQL scripts, see :doc:`sql-upgrade-scripts`. Please note that version bumps and SQL script renaming both require all open pull requests to be updated with the latest from the "develop" branch so you might want to add any SQL script renaming to the hotfix branch before you put it through QA to be merged with develop. This way, open pull requests only need to be updated once. - ----- - -Previous: :doc:`containers` | Next: :doc:`tools` diff --git a/doc/sphinx-guides/source/developers/performance.rst b/doc/sphinx-guides/source/developers/performance.rst new file mode 100644 index 00000000000..46c152f322e --- /dev/null +++ b/doc/sphinx-guides/source/developers/performance.rst @@ -0,0 +1,196 @@ +Performance +=========== + +`Performance is a feature `_ was a mantra when Stack Overflow was being developed. We endeavor to do the same with Dataverse! + +In this section we collect ideas and share practices for improving performance. + +.. contents:: |toctitle| + :local: + +Problem Statement +----------------- + +Performance has always been important to the Dataverse Project, but results have been uneven. We've seen enough success in the marketplace that performance must be adequate, but internally we sometimes refer to Dataverse as a pig. 🷠+ +Current Practices +----------------- + +We've adopted a number of practices to help us maintain our current level of performance and most should absolutely continue in some form, but challenges mentioned throughout should be addressed to further improve performance. + +Cache When You Can +~~~~~~~~~~~~~~~~~~ + +The Metrics API, for example, caches values for 7 days by default. We took a look at JSR 107 (JCache - Java Temporary Caching API) in `#2100 `_. We're aware of the benefits of caching. + +Use Async +~~~~~~~~~ + +We index datasets (and all objects) asynchronously. That is, we let changes persist in the database and afterward copy the data into Solr. + +Use a Queue +~~~~~~~~~~~ + +We use a JMS queue for when ingesting tabular files. We've talked about adding a queue (even `an external queue `_) for indexing, DOI registration, and other services. + +Offload Expensive Operations Outside the App Server +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When operations are computationally expensive, we have realized performance gains by offloading them to systems outside of the core code. For example, rather than having files pass through our application server when they are downloaded, we use direct download so that client machines download files directly from S3. (We use the same trick with upload.) When a client downloads multiple files, rather than zipping them within the application server as before, we now have a separate "zipper" process that does this work out of band. + +Drop to Raw SQL as Necessary +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We aren't shy about writing raw SQL queries when necessary. We've written `querycount `_  scripts to help identify problematic queries and mention slow query log at :doc:`/admin/monitoring`. + +Add Indexes to Database Tables +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There was a concerted effort in `#1880 `_ to add indexes to a large number of columns, but it's something we're mindful of, generally. Perhaps we could use some better detection of when indexes would be valuable. + +Find Bottlenecks with a Profiler +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +VisualVM is popular and bundled with Netbeans. Many options are available including `JProfiler `_. + +Warn Developers in Code Comments +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For code that has been optimized for performance, warnings are sometimes inserted in the form of comments for future developers to prevent backsliding. + +Write Docs for Devs about Perf +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Like this doc. :) + +Sometimes perf is written about in other places, such as :ref:`avoid-efficiency-issues-with-render-logic-expressions`. + +Horizontal Scaling of App Server +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We've made it possible to run more than one application server, though it requires some special configuration. This way load can be spread out across multiple servers. For details, see :ref:`multiple-app-servers` in the Installation Guide. + +Code Review and QA +~~~~~~~~~~~~~~~~~~ + +Before code is merged, while it is in review or QA, if a performance problem is detected (usually on an ad hoc basis), the code is returned to the developer for improvement. Developers and reviewers typically do not have many tools at their disposal to test code changes against anything close to production data. QA maintains a machine with a copy of production data but tests against smaller data unless a performance problem is suspected. + +A new QA guide is coming in https://github.com/IQSS/dataverse/pull/10103 + +Locust Testing at Release Time +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As one of the final steps in preparing for a release, QA runs performance tests using a tool called Locust as explained the Developer Guide (see :ref:`locust`). The tests are not comprehensive, testing only a handful of pages with anonymous users, but they increase confidence that the upcoming release is not drastically slower than previous releases. + +Issue Tracking and Prioritization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Performance issues are tracked in our issue tracker under the `Feature: Performance & Stability `_ label (e.g. `#7788 `_). That way, we can track performance problems throughout the application. Unfortunately, the pain is often felt by users in production before we realize there is a problem. As needed, performance issues are prioritized to be included in a sprint, to \ `speed up the collection page `_, for example. + +Document Performance Tools +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the :doc:`/admin/monitoring` page section of the Admin Guide we describe how to set up Munin for monitoring performance of an operating system. We also explain how to set up Performance Insights to monitor AWS RDS (PostgreSQL as a service, in our case). In the :doc:`/developers/tools` section of the Developer Guide, we have documented how to use Eclipse Memory Analyzer Tool (MAT), SonarQube, jmap, and jstat. + +Google Analytics +~~~~~~~~~~~~~~~~ + +Emails go to a subset of the team monthly with subjects like "Your September Search performance for https://dataverse.harvard.edu" with a link to a report but it's mostly about the number clicks, not how fast the site is. It's unclear if it provides any value with regard to performance. + +Abandoned Tools and Practices +----------------------------- + +New Relic +~~~~~~~~~ + +For many years Harvard Dataverse was hooked up to New Relic, a tool that promises all-in-one observability, according to their `website `_. In practice, we didn't do much with `the data `_. + +Areas of Particular Concern +--------------------------- + +Command Engine Execution Rate Metering +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We'd like to rate limit commands (CreateDataset, etc.) so that we can keep them at a reasonable level (`#9356 `_). This is similar to how many APIs are rate limited, such as the GitHub API. + +Solr +~~~~ + +While in the past Solr performance hasn't been much of a concern, in recent years we've noticed performance problems when Harvard Dataverse is under load. Improvements were made in `PR #10050 `_, for example. + +Datasets with Large Numbers of Files or Versions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We'd like to scale Dataverse to better handle large number of files or versions. Progress was made in `PR #9883 `_. + +Withstanding Bots +~~~~~~~~~~~~~~~~~ + +Google bot, etc. + +Suggested Practices +------------------- + +Many of our current practices should remain in place unaltered. Others could use some refinement. Some new practices should be adopted as well. Here are some suggestions. + +Implement the Frontend Plan for Performance +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The `Dataverse - SPA MVP Definition doc `_  has some ideas around how to achieve good performance for the new front end in the areas of rendering, monitoring,file upload/download, pagination, and caching. We should create as many issues as necessary in the frontend repo and work on them in time. The doc recommends the use of `React Profiler `_ and other tools. Not mentioned is https://pagespeed.web.dev but we can investigate it as well. See also `#183 `_, a parent issue about performance. In `#184 `_  we plan to compare the performance of the old JSF UI vs. the new React UI. Cypress plugins for load testing could be investigated. + +Set up Query Counter in Jenkins +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +See countquery script above. See also https://jenkins.dataverse.org/job/IQSS-dataverse-develop/ws/target/query_count.out + +Show the plot over time. Make spikes easily apparent. 320,035 queries as of this writing. + +Count Database Queries per API Test +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Is it possible? Just a thought. + +Teach Developers How to Do Performance Testing Locally +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Do developers know how to use a profiler? Should they use `JMeter `_? `statsd-jvm-profiler `_? How do you run our :ref:`locust` tests? Should we continue using that tool? Give developers time and space to try out tools and document any tips along the way. For this stage, small data is fine. + +Automate Performance Testing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We are already using two excellent continuous integration (CI) tools, Jenkins and GitHub Actions, to test our code. We should add performance testing into the mix (`#4201 `_ is an old issue for this but we can open a fresh one). Currently we test every commit on every PR and we should consider if this model makes sense since performance testing will likely take longer to run than regular tests. Once developers are comfortable with their favorite tools, we can pick which ones to automate. + +Make Production Data or Equivalent Available to Developers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If developers are only testing small amounts of data on their laptops, it's hard to detect performance problems. Not every bug fix requires access to data similar to production, but it should be made available. This is not a trivial task! If we are to use actual production data, we need to be very careful to de-identify it. If we start with our `sample-data `_  repo instead, we'll need to figure out how to make sure we cover cases like many files, many versions, etc. + +Automate Performance Testing with Production Data or Equivalent +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Hopefully the environment developers use with production data or equivalent can be made available to our CI tools. Perhaps these tests don't need to be run on every commit to every pull request, but they should be run regularly. + +Use Monitoring as Performance Testing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Monitoring can be seen as a form of testing. How long is a round trip ping to production? What is the Time to First Byte? First Contentful Paint? Largest Contentful Paint? Time to Interactive? We now have a beta server that we could monitor continuously to know if our app is getting faster or slower over time. Should our monitoring of production servers be improved? + +Learn from Training and Conferences +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Most likely there is training available that is oriented toward performance. The subject of performance often comes up at conferences as well. + +Learn from the Community How They Monitor Performance +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some members of the Dataverse community are likely users of newish tools like the ELK stack (Elasticsearch, Logstash, and Kibana), the TICK stack (Telegraph InfluxDB Chronograph and Kapacitor), GoAccess, Prometheus, Graphite, and more we haven't even heard of. In the :doc:`/admin/monitoring` section of the Admin Guide, we already encourage the community to share findings, but we could dedicate time to this topic at our annual meeting or community calls. + +Teach the Community to Do Performance Testing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We have a worldwide community of developers. We should do what we can in the form of documentation and other resources to help them develop performant code. + +Conclusion +---------- + +Given its long history, Dataverse has encountered many performance problems over the years. The core team is conversant in how to make the app more performant, but investment in learning additional tools and best practices would likely yield dividends. We should automate our performance testing, catching more problems before code is merged. diff --git a/doc/sphinx-guides/source/developers/remote-users.rst b/doc/sphinx-guides/source/developers/remote-users.rst index d8f90e9257f..38b3edab772 100755 --- a/doc/sphinx-guides/source/developers/remote-users.rst +++ b/doc/sphinx-guides/source/developers/remote-users.rst @@ -39,7 +39,7 @@ STOP! ``oidc-keycloak-auth-provider.json`` was changed from http://localhost:809 If you are working on the OpenID Connect (OIDC) user authentication flow, you do not need to connect to a remote provider (as explained in :doc:`/installation/oidc`) to test this feature. Instead, you can use the available configuration that allows you to run a test Keycloak OIDC identity management service locally through a Docker container. -(Please note! The client secret (``ss6gE8mODCDfqesQaSG3gwUwZqZt547E``) is hard-coded in ``oidc-realm.json`` and ``oidc-keycloak-auth-provider.json``. Do not use this config in production! This is only for developers.) +(Please note! The client secret (``94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8``) is hard-coded in ``test-realm.json`` and ``oidc-keycloak-auth-provider.json``. Do not use this config in production! This is only for developers.) You can find this configuration in ``conf/keycloak``. There are two options available in this directory to run a Keycloak container: bash script or docker-compose. @@ -55,15 +55,23 @@ Now load the configuration defined in ``oidc-keycloak-auth-provider.json`` into You should see the new provider, called "OIDC-Keycloak", under "Other options" on the Log In page. -You should be able to log into Keycloak with the following credentials: +You should be able to log into Keycloak with the one of the following credentials: -- username: kcuser -- password: kcpassword +.. list-table:: + + * - Username + - Password + * - admin + - admin + * - curator + - curator + * - user + - user + * - affiliate + - affiliate In case you want to stop and remove the Keycloak container, just run the other available bash script: ``./rm-keycloak.sh`` ----- - -Previous: :doc:`unf/index` | Next: :doc:`geospatial` +Note: the Keycloak admin to login at the admin console is ``kcadmin:kcpassword`` diff --git a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst index 4d323455d28..1cb9ae9e6db 100644 --- a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst +++ b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst @@ -3,6 +3,12 @@ Direct DataFile Upload/Replace API The direct Datafile Upload API is used internally to support direct upload of files to S3 storage and by tools such as the DVUploader. +.. contents:: |toctitle| + :local: + +Overview +-------- + Direct upload involves a series of three activities, each involving interacting with the server for a Dataverse installation: * Requesting initiation of a transfer from the server @@ -69,8 +75,9 @@ In the single part case, only one call to the supplied URL is required: .. code-block:: bash - curl -H 'x-amz-tagging:dv-state=temp' -X PUT -T "" + curl -i -H 'x-amz-tagging:dv-state=temp' -X PUT -T "" +Note that without the ``-i`` flag, you should not expect any output from the command above. With the ``-i`` flag, you should expect to see a "200 OK" response. In the multipart case, the client must send each part and collect the 'eTag' responses from the server. The calls for this are the same as the one for the single part case except that each call should send a slice of the total file, with the last part containing the remaining bytes. The responses from the S3 server for these calls will include the 'eTag' for the uploaded part. @@ -90,7 +97,7 @@ If the client is unable to complete the multipart upload, it should call the abo .. _direct-add-to-dataset-api: -Adding the Uploaded file to the Dataset +Adding the Uploaded File to the Dataset --------------------------------------- Once the file exists in the s3 bucket, a final API call is needed to add it to the Dataset. This call is the same call used to upload a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter. @@ -115,10 +122,10 @@ The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.Data curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/add?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" -Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. -With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. +Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. -To add multiple Uploaded Files to the Dataset +To Add Multiple Uploaded Files to the Dataset --------------------------------------------- Once the files exists in the s3 bucket, a final API call is needed to add all the files to the Dataset. In this API call, additional metadata is added using the "jsonData" parameter. @@ -146,11 +153,10 @@ The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.Data curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" -Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. -With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. - +Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. -Replacing an existing file in the Dataset +Replacing an Existing File in the Dataset ----------------------------------------- Once the file exists in the s3 bucket, a final API call is needed to register it as a replacement of an existing file. This call is the same call used to replace a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter. @@ -176,10 +182,10 @@ Note that the API call does not validate that the file matches the hash value su curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/files/$FILE_IDENTIFIER/replace" -F "jsonData=$JSON_DATA" -Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. -With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. +Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. -Replacing multiple existing files in the Dataset +Replacing Multiple Existing Files in the Dataset ------------------------------------------------ Once the replacement files exist in the s3 bucket, a final API call is needed to register them as replacements for existing files. In this API call, additional metadata is added using the "jsonData" parameter. @@ -274,5 +280,5 @@ The JSON object returned as a response from this API call includes a "data" that } -Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. -With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. +Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. diff --git a/doc/sphinx-guides/source/developers/selinux.rst b/doc/sphinx-guides/source/developers/selinux.rst index dcbf3ee594f..ca41ab82d25 100644 --- a/doc/sphinx-guides/source/developers/selinux.rst +++ b/doc/sphinx-guides/source/developers/selinux.rst @@ -109,7 +109,3 @@ Once your updated SELinux rules are in place, try logging in with Shibboleth aga Keep iterating until it works and then create a pull request based on your updated file. Good luck! Many thanks to Bill Horka from IQSS for his assistance in explaining how to construct a SELinux Type Enforcement (TE) file! - ----- - -Previous: :doc:`geospatial` diff --git a/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst b/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst index bace682b1b8..32c465524b0 100644 --- a/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst +++ b/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst @@ -21,12 +21,14 @@ If you are creating a new database table (which maps to an ``@Entity`` in JPA), If you are doing anything other than creating a new database table such as adding a column to an existing table, you must create or update a SQL upgrade script. +.. _create-sql-script: + How to Create a SQL Upgrade Script ---------------------------------- We assume you have already read the :doc:`version-control` section and have been keeping your feature branch up to date with the "develop" branch. -Create a new file called something like ``V4.11.0.1__5565-sanitize-directory-labels.sql`` in the ``src/main/resources/db/migration`` directory. Use a version like "4.11.0.1" in the example above where the previously released version was 4.11, ensuring that the version number is unique. Note that this is not the version that you expect the code changes to be included in (4.12 in this example). When the previously released version is a patch version (e.g. 5.10.1), use "5.10.1.1" for the first SQL script version (rather than "5.10.1.0.1"). For the "description" you should the name of your branch, which should include the GitHub issue you are working on, as in the example above. To read more about Flyway file naming conventions, see https://flywaydb.org/documentation/migrations#naming +Create a new file called something like ``V4.11.0.1__5565-sanitize-directory-labels.sql`` in the ``src/main/resources/db/migration`` directory. Use a version like "4.11.0.1" in the example above where the previously released version was 4.11, ensuring that the version number is unique. Note that this is not the version that you expect the code changes to be included in (4.12 in this example). When the previously released version is a patch version (e.g. 5.10.1), use "5.10.1.1" for the first SQL script version (rather than "5.10.1.0.1"). For the "description" you should the name of your branch, which should include the GitHub issue you are working on, as in the example above. To read more about Flyway file naming conventions, see https://documentation.red-gate.com/fd/migrations-184127470.html The SQL migration script you wrote will be part of the war file and executed when the war file is deployed. To see a history of Flyway database migrations that have been applied, look at the ``flyway_schema_history`` table. @@ -41,7 +43,3 @@ Renaming SQL Upgrade Scripts Please note that if you need to rename your script (because a new version of the Dataverse Software was released, for example), you will see the error "FlywayException: Validate failed: Detected applied migration not resolved locally" when you attempt to deploy and deployment will fail. To resolve this problem, delete the old migration from the ``flyway_schema_history`` table and attempt to redeploy. - ----- - -Previous: :doc:`version-control` | Next: :doc:`testing` diff --git a/doc/sphinx-guides/source/developers/testing.rst b/doc/sphinx-guides/source/developers/testing.rst index acaeccf4f23..2ea85913d42 100755 --- a/doc/sphinx-guides/source/developers/testing.rst +++ b/doc/sphinx-guides/source/developers/testing.rst @@ -5,7 +5,7 @@ Testing In order to keep our codebase healthy, the Dataverse Project encourages developers to write automated tests in the form of unit tests and integration tests. We also welcome ideas for how to improve our automated testing. .. contents:: |toctitle| - :local: + :local: The Health of a Codebase ------------------------ @@ -46,7 +46,7 @@ The main takeaway should be that we care about unit testing enough to measure th Writing Unit Tests with JUnit ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We are aware that there are newer testing tools such as TestNG, but we use `JUnit `_ because it's tried and true. +We are aware that there are newer testing tools such as TestNG, but we use `JUnit `_ because it's tried and true. We support JUnit 5 based testing and require new tests written with it. (Since Dataverse 6.0, we migrated all of our tests formerly based on JUnit 4.) @@ -89,22 +89,35 @@ JUnit 5 Test Helper Extensions Our codebase provides little helpers to ease dealing with state during tests. Some tests might need to change something which should be restored after the test ran. -For unit tests, the most interesting part is to set a JVM setting just for the current test. -Please use the ``@JvmSetting(key = JvmSettings.XXX, value = "")`` annotation on a test method or -a test class to set and clear the property automatically. +For unit tests, the most interesting part is to set a JVM setting just for the current test or a whole test class. +(Which might be an inner class, too!). Please make use of the ``@JvmSetting(key = JvmSettings.XXX, value = "")`` +annotation and also make sure to annotate the test class with ``@LocalJvmSettings``. -To set arbitrary system properties for the current test, a similar extension -``@SystemProperty(key = "", value = "")`` has been added. +Inspired by JUnit's ``@MethodSource`` annotation, you may use ``@JvmSetting(key = JvmSettings.XXX, method = "zzz")`` +to reference a static method located in the same test class by name (i. e. ``private static String zzz() {}``) to allow +retrieving dynamic data instead of String constants only. (Note the requirement for a *static* method!) + +If you want to delete a setting, simply provide a ``null`` value. This can be used to override a class-wide setting +or some other default that is present for some reason. + +To set arbitrary system properties for the current test, a similar extension ``@SystemProperty(key = "", value = "")`` +has been added. (Note: it does not support method references.) Both extensions will ensure the global state of system properties is non-interfering for test executions. Tests using these extensions will be executed in serial. +This settings helper may be extended at a later time to manipulate settings in a remote instance during integration +or end-to-end testing. Stay tuned! + Observing Changes to Code Coverage ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Once you've written some tests, you're probably wondering how much you've helped to increase the code coverage. In Netbeans, do a "clean and build." Then, under the "Projects" tab, right-click "dataverse" and click "Code Coverage" -> "Show Report". For each Java file you have open, you should be able to see the percentage of code that is covered by tests and every line in the file should be either green or red. Green indicates that the line is being exercised by a unit test and red indicates that it is not. -In addition to seeing code coverage in Netbeans, you can also see code coverage reports by opening ``target/site/jacoco/index.html`` in your browser. +In addition to seeing code coverage in Netbeans, you can also see code coverage reports by opening ``target/site/jacoco-X-test-coverage-report/index.html`` in your browser. +Depending on the report type you want to look at, let ``X`` be one of ``unit``, ``integration`` or ``merged``. +"Merged" will display combined coverage of both unit and integration test, but does currently not cover API tests. + Testing Commands ^^^^^^^^^^^^^^^^ @@ -177,42 +190,38 @@ Finally, run the script: $ ./ec2-create-instance.sh -g jenkins.yml -l log_dir -Running the full API test suite using Docker +Running the Full API Test Suite Using Docker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To run the full suite of integration tests on your laptop, running Dataverse and its dependencies in Docker, as explained in the :doc:`/container/dev-usage` section of the Container Guide. - -Alternatively, you can run tests against the app server running on your laptop by following the "getting set up" steps below. +To run the full suite of integration tests on your laptop, we recommend running Dataverse and its dependencies in Docker, as explained in the :doc:`/container/dev-usage` section of the Container Guide. This environment provides additional services (such as S3) that are used in testing. -Getting Set Up to Run REST Assured Tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Running the APIs Without Docker (Classic Dev Env) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Unit tests are run automatically on every build, but dev environments and servers require special setup to run REST Assured tests. In short, the Dataverse Software needs to be placed into an insecure mode that allows arbitrary users and datasets to be created and destroyed. This differs greatly from the out-of-the-box behavior of the Dataverse Software, which we strive to keep secure for sysadmins installing the software for their institutions in a production environment. +While it is possible to run a good number of API tests without using Docker in our :doc:`classic-dev-env`, we are transitioning toward including additional services (such as S3) in our Dockerized development environment (:doc:`/container/dev-usage`), so you will probably find it more convenient to it instead. -The :doc:`dev-environment` section currently refers developers here for advice on getting set up to run REST Assured tests, but we'd like to add some sort of "dev" flag to the installer to put the Dataverse Software in "insecure" mode, with lots of scary warnings that this dev mode should not be used in production. - -The instructions below assume a relatively static dev environment on a Mac. There is a newer "all in one" Docker-based approach documented in the :doc:`/developers/containers` section under "Docker" that you may like to play with as well. +Unit tests are run automatically on every build, but dev environments and servers require special setup to run API (REST Assured) tests. In short, the Dataverse software needs to be placed into an insecure mode that allows arbitrary users and datasets to be created and destroyed (this is done automatically in the Dockerized environment, as well as the steps described below). This differs greatly from the out-of-the-box behavior of the Dataverse software, which we strive to keep secure for sysadmins installing the software for their institutions in a production environment. The Burrito Key ^^^^^^^^^^^^^^^ -For reasons that have been lost to the mists of time, the Dataverse Software really wants you to to have a burrito. Specifically, if you're trying to run REST Assured tests and see the error "Dataverse config issue: No API key defined for built in user management", you must run the following curl command (or make an equivalent change to your database): +For reasons that have been lost to the mists of time, the Dataverse software really wants you to to have a burrito. Specifically, if you're trying to run REST Assured tests and see the error "Dataverse config issue: No API key defined for built in user management", you must run the following curl command (or make an equivalent change to your database): ``curl -X PUT -d 'burrito' http://localhost:8080/api/admin/settings/BuiltinUsers.KEY`` -Without this "burrito" key in place, REST Assured will not be able to create users. We create users to create objects we want to test, such as Dataverse collections, datasets, and files. +Without this "burrito" key in place, REST Assured will not be able to create users. We create users to create objects we want to test, such as collections, datasets, and files. -Root Dataverse Collection Permissions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Root Collection Permissions +^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In your browser, log in as dataverseAdmin (password: admin) and click the "Edit" button for your root Dataverse collection. Navigate to Permissions, then the Edit Access button. Under "Who can add to this Dataverse collection?" choose "Anyone with a Dataverse installation account can add sub Dataverse collections and datasets" if it isn't set to this already. +In your browser, log in as dataverseAdmin (password: admin) and click the "Edit" button for your root collection. Navigate to Permissions, then the Edit Access button. Under "Who can add to this collection?" choose "Anyone with a Dataverse installation account can add sub collections and datasets" if it isn't set to this already. Alternatively, this same step can be done with this script: ``scripts/search/tests/grant-authusers-add-on-root`` -Publish Root Dataverse Collection -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Publish Root Collection +^^^^^^^^^^^^^^^^^^^^^^^ -The root Dataverse collection must be published for some of the REST Assured tests to run. +The root collection must be published for some of the REST Assured tests to run. dataverse.siteUrl ^^^^^^^^^^^^^^^^^ @@ -225,6 +234,20 @@ If ``dataverse.siteUrl`` is absent, you can add it with: ``./asadmin create-jvm-options "-Ddataverse.siteUrl=http\://localhost\:8080"`` +dataverse.oai.server.maxidentifiers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The OAI Harvesting tests require that the paging limit for ListIdentifiers must be set to 2, in order to be able to trigger this paging behavior without having to create and export too many datasets: + +``./asadmin create-jvm-options "-Ddataverse.oai.server.maxidentifiers=2"`` + +dataverse.oai.server.maxrecords +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The OAI Harvesting tests require that the paging limit for ListRecords must be set to 2, in order to be able to trigger this paging behavior without having to create and export too many datasets: + +``./asadmin create-jvm-options "-Ddataverse.oai.server.maxrecords=2"`` + Identifier Generation ^^^^^^^^^^^^^^^^^^^^^ @@ -245,17 +268,22 @@ Remember, it’s only a test (and it's not graded)! Some guidelines to bear in m - Map out which logical functions you want to test - Understand what’s being tested and ensure it’s repeatable - Assert the conditions of success / return values for each operation - * A useful resource would be `HTTP status codes `_ + * A useful resource would be `HTTP status codes `_ - Let the code do the labor; automate everything that happens when you run your test file. +- If you need to test an optional service (S3, etc.), add it to our docker compose file. See :doc:`/container/dev-usage`. - Just as with any development, if you’re stuck: ask for help! -To execute existing integration tests on your local Dataverse installation, a helpful command line tool to use is `Maven `_. You should have Maven installed as per the `Development Environment `_ guide, but if not it’s easily done via Homebrew: ``brew install maven``. +To execute existing integration tests on your local Dataverse installation from the command line, use Maven. You should have Maven installed as per :doc:`dev-environment`, but if not it's easily done via Homebrew: ``brew install maven``. Once installed, you may run commands with ``mvn [options] [] []``. -+ If you want to run just one particular API test, it’s as easy as you think: ++ If you want to run just one particular API test class: + + ``mvn test -Dtest=UsersIT`` - ``mvn test -Dtest=FileRecordJobIT`` ++ If you want to run just one particular API test method, + + ``mvn test -Dtest=UsersIT#testMergeAccounts`` + To run more than one test at a time, separate by commas: @@ -284,33 +312,37 @@ To run a test with Testcontainers, you will need to write a JUnit 5 test. Please make sure to: 1. End your test class with ``IT`` -2. Provide a ``@Tag("testcontainers")`` to be picked up during testing. +2. Annotate the test class with two tags: -.. code:: java + .. code:: java - /** A very minimal example for a Testcontainers integration test class. */ - @Testcontainers - @Tag("testcontainers") - class MyExampleIT { /* ... */ } + /** A very minimal example for a Testcontainers integration test class. */ + @Testcontainers(disabledWithoutDocker = true) + @Tag(edu.harvard.iq.dataverse.util.testing.Tags.INTEGRATION_TEST) + @Tag(edu.harvard.iq.dataverse.util.testing.Tags.USES_TESTCONTAINERS) + class MyExampleIT { /* ... */ } -If using upstream Modules, e.g. for PostgreSQL or similar, you will need to add +If using upstream modules, e.g. for PostgreSQL or similar, you will need to add a dependency to ``pom.xml`` if not present. `See the PostgreSQL module example. `_ To run these tests, simply call out to Maven: .. code:: - mvn -P tc verify + mvn verify + +Notes: -.. note:: +1. Remember to have Docker ready to serve or tests will fail. +2. You can skip running unit tests by adding ``-DskipUnitTests`` +3. You can choose to ignore test with Testcontainers by adding ``-Dit.groups='integration & !testcontainers'`` + Learn more about `filter expressions in the JUnit 5 guide `_. - 1. Remember to have Docker ready to serve or tests will fail. - 2. This will not run any unit tests or API tests. -Measuring Coverage of Integration Tests ---------------------------------------- +Measuring Coverage of API Tests +------------------------------- -Measuring the code coverage of integration tests with Jacoco requires several steps. In order to make these steps clear we'll use "/usr/local/payara6" as the Payara directory and "dataverse" as the Payara Unix user. +Measuring the code coverage of API tests with Jacoco requires several steps. In order to make these steps clear we'll use "/usr/local/payara6" as the Payara directory and "dataverse" as the Payara Unix user. Please note that this was tested under Glassfish 4 but it is hoped that the same steps will work with Payara. @@ -360,8 +392,8 @@ Run this as the "dataverse" user. Note that after deployment the file "/usr/local/payara6/glassfish/domains/domain1/config/jacoco.exec" exists and is empty. -Run Integration Tests -~~~~~~~~~~~~~~~~~~~~~ +Run API Tests +~~~~~~~~~~~~~ Note that even though you see "docker-aio" in the command below, we assume you are not necessarily running the test suite within Docker. (Some day we'll probably move this script to another directory.) For this reason, we pass the URL with the normal port (8080) that app servers run on to the ``run-test-suite.sh`` script. @@ -395,6 +427,10 @@ target/coverage-it/index.html is the place to start reading the code coverage re Load/Performance Testing ------------------------ +See also :doc:`/qa/performance-tests` in the QA Guide. + +.. _locust: + Locust ~~~~~~ @@ -494,7 +530,7 @@ Future Work on Integration Tests - Automate testing of dataverse-client-python: https://github.com/IQSS/dataverse-client-python/issues/10 - Work with @leeper on testing the R client: https://github.com/IQSS/dataverse-client-r - Review and attempt to implement "API Test Checklist" from @kcondon at https://docs.google.com/document/d/199Oq1YwQ4pYCguaeW48bIN28QAitSk63NbPYxJHCCAE/edit?usp=sharing -- Generate code coverage reports for **integration** tests: https://github.com/pkainulainen/maven-examples/issues/3 and http://www.petrikainulainen.net/programming/maven/creating-code-coverage-reports-for-unit-and-integration-tests-with-the-jacoco-maven-plugin/ +- Generate code coverage reports for **integration** tests: https://github.com/pkainulainen/maven-examples/issues/3 and https://www.petrikainulainen.net/programming/maven/creating-code-coverage-reports-for-unit-and-integration-tests-with-the-jacoco-maven-plugin/ - Consistent logging of API Tests. Show test name at the beginning and end and status codes returned. - expected passing and known/expected failing integration tests: https://github.com/IQSS/dataverse/issues/4438 @@ -519,7 +555,3 @@ Future Work on Accessibility Testing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Using https://github.com/GlobalDataverseCommunityConsortium/dataverse-ansible and hooks available from accessibility testing tools, automate the running of accessibility tools on PRs so that developers will receive quicker feedback on proposed code changes that reduce the accessibility of the application. - ----- - -Previous: :doc:`sql-upgrade-scripts` | Next: :doc:`documentation` diff --git a/doc/sphinx-guides/source/developers/tips.rst b/doc/sphinx-guides/source/developers/tips.rst index e1ee40cafa5..839ae3aa19d 100755 --- a/doc/sphinx-guides/source/developers/tips.rst +++ b/doc/sphinx-guides/source/developers/tips.rst @@ -2,7 +2,7 @@ Tips ==== -If you just followed the steps in :doc:`dev-environment` for the first time, you will need to get set up to deploy code to your app server. Below you'll find other tips as well. +If you just followed the steps in :doc:`classic-dev-env` for the first time, you will need to get set up to deploy code to your app server. Below you'll find other tips as well. .. contents:: |toctitle| :local: @@ -10,7 +10,7 @@ If you just followed the steps in :doc:`dev-environment` for the first time, you Iterating on Code and Redeploying --------------------------------- -When you followed the steps in the :doc:`dev-environment` section, the war file was deployed to Payara by the Dataverse Software installation script. That's fine but once you're ready to make a change to the code you will need to get comfortable with undeploying and redeploying code (a war file) to Payara. +When you followed the steps in the :doc:`classic-dev-env` section, the war file was deployed to Payara by the Dataverse Software installation script. That's fine but once you're ready to make a change to the code you will need to get comfortable with undeploying and redeploying code (a war file) to Payara. It's certainly possible to manage deployment and undeployment of the war file via the command line using the ``asadmin`` command that ships with Payara (that's what the Dataverse Software installation script uses and the steps are documented below), but we recommend getting set up with an IDE such as Netbeans to manage deployment for you. @@ -99,7 +99,7 @@ With over 100 tables, the Dataverse Software PostgreSQL database ("dvndb") can b pgAdmin ~~~~~~~~ -Back in the :doc:`dev-environment` section, we had you install pgAdmin, which can help you explore the tables and execute SQL commands. It's also listed in the :doc:`tools` section. +Back in the :doc:`classic-dev-env` section, we had you install pgAdmin, which can help you explore the tables and execute SQL commands. It's also listed in the :doc:`tools` section. SchemaSpy ~~~~~~~~~ @@ -238,6 +238,4 @@ with the following code in ``SettingsWrapper.java``: A more serious example would be direct calls to PermissionServiceBean methods used in render logic expressions. This is something that has happened and caused some problems in real life. A simple permission service lookup (for example, whether a user is authorized to create a dataset in the current dataverse) can easily take 15 database queries. Repeated multiple times, this can quickly become a measurable delay in rendering the page. PermissionsWrapper must be used exclusively for any such lookups from JSF pages. ----- - -Previous: :doc:`dev-environment` | Next: :doc:`troubleshooting` +See also :doc:`performance`. diff --git a/doc/sphinx-guides/source/developers/tools.rst b/doc/sphinx-guides/source/developers/tools.rst index a21becd14cf..9b3e38232e8 100755 --- a/doc/sphinx-guides/source/developers/tools.rst +++ b/doc/sphinx-guides/source/developers/tools.rst @@ -2,11 +2,16 @@ Tools ===== -These are handy tools for your :doc:`/developers/dev-environment/`. +These are handy tools for your :doc:`dev-environment`. .. contents:: |toctitle| :local: +Tools for Faster Deployment ++++++++++++++++++++++++++++ + +See :ref:`ide-trigger-code-deploy` in the Container Guide. + Netbeans Connector Chrome Extension +++++++++++++++++++++++++++++++++++ @@ -18,7 +23,7 @@ Unfortunately, while the Netbeans Connector Chrome Extension used to "just work" pgAdmin +++++++ -You probably installed pgAdmin when following the steps in the :doc:`dev-environment` section but if not, you can download it from https://www.pgadmin.org +You may have installed pgAdmin when following the steps in the :doc:`classic-dev-env` section but if not, you can download it from https://www.pgadmin.org Maven +++++ @@ -28,20 +33,20 @@ With Maven installed you can run ``mvn package`` and ``mvn test`` from the comma PlantUML ++++++++ -PlantUML is used to create diagrams in the guides and other places. Download it from http://plantuml.com and check out an example script at https://github.com/IQSS/dataverse/blob/v4.6.1/doc/Architecture/components.sh . Note that for this script to work, you'll need the ``dot`` program, which can be installed on Mac with ``brew install graphviz``. +PlantUML is used to create diagrams in the guides and other places. Download it from https://plantuml.com and check out an example script at https://github.com/IQSS/dataverse/blob/v4.6.1/doc/Architecture/components.sh . Note that for this script to work, you'll need the ``dot`` program, which can be installed on Mac with ``brew install graphviz``. Eclipse Memory Analyzer Tool (MAT) ++++++++++++++++++++++++++++++++++ The Memory Analyzer Tool (MAT) from Eclipse can help you analyze heap dumps, showing you "leak suspects" such as seen at https://github.com/payara/Payara/issues/350#issuecomment-115262625 -It can be downloaded from http://www.eclipse.org/mat +It can be downloaded from https://www.eclipse.org/mat If the heap dump provided to you was created with ``gcore`` (such as with ``gcore -o /tmp/app.core $app_pid``) rather than ``jmap``, you will need to convert the file before you can open it in MAT. Using ``app.core.13849`` as example of the original 33 GB file, here is how you could convert it into a 26 GB ``app.core.13849.hprof`` file. Please note that this operation took almost 90 minutes: ``/usr/java7/bin/jmap -dump:format=b,file=app.core.13849.hprof /usr/java7/bin/java app.core.13849`` -A file of this size may not "just work" in MAT. When you attempt to open it you may see something like "An internal error occurred during: "Parsing heap dump from '/tmp/heapdumps/app.core.13849.hprof'". Java heap space". If so, you will need to increase the memory allocated to MAT. On Mac OS X, this can be done by editing ``MemoryAnalyzer.app/Contents/MacOS/MemoryAnalyzer.ini`` and increasing the value "-Xmx1024m" until it's high enough to open the file. See also http://wiki.eclipse.org/index.php/MemoryAnalyzer/FAQ#Out_of_Memory_Error_while_Running_the_Memory_Analyzer +A file of this size may not "just work" in MAT. When you attempt to open it you may see something like "An internal error occurred during: "Parsing heap dump from '/tmp/heapdumps/app.core.13849.hprof'". Java heap space". If so, you will need to increase the memory allocated to MAT. On Mac OS X, this can be done by editing ``MemoryAnalyzer.app/Contents/MacOS/MemoryAnalyzer.ini`` and increasing the value "-Xmx1024m" until it's high enough to open the file. See also https://wiki.eclipse.org/index.php/MemoryAnalyzer/FAQ#Out_of_Memory_Error_while_Running_the_Memory_Analyzer PageKite ++++++++ @@ -58,7 +63,7 @@ The first time you run ``./pagekite.py`` a file at ``~/.pagekite.rc`` will be created. You can edit this file to configure PageKite to serve up port 8080 (the default app server HTTP port) or the port of your choosing. -According to https://pagekite.net/support/free-for-foss/ PageKite (very generously!) offers free accounts to developers writing software the meets http://opensource.org/docs/definition.php such as the Dataverse Project. +According to https://pagekite.net/support/free-for-foss/ PageKite (very generously!) offers free accounts to developers writing software the meets https://opensource.org/docs/definition.php such as the Dataverse Project. MSV +++ @@ -96,7 +101,7 @@ Download SonarQube from https://www.sonarqube.org and start look in the `bin` di -Dsonar.test.exclusions='src/test/**,src/main/webapp/resources/**' \ -Dsonar.issuesReport.html.enable=true \ -Dsonar.issuesReport.html.location='sonar-issues-report.html' \ - -Dsonar.jacoco.reportPath=target/jacoco.exec + -Dsonar.jacoco.reportPath=target/coverage-reports/jacoco-unit.exec Once the analysis is complete, you should be able to access http://localhost:9000/dashboard?id=edu.harvard.iq%3Adataverse to see the report. To learn about resource leaks, for example, click on "Bugs", the "Tag", then "leak" or "Rule", then "Resources should be closed". @@ -261,10 +266,3 @@ We can see that the first ``FGC`` resulted in reducing the ``"O"`` by almost 7GB etc. ... It is clearly growing - so now we can conclude that indeed something there is using memory in a way that's not recoverable, and this is a clear problem. - - - - ----- - -Previous: :doc:`making-releases` | Next: :doc:`unf/index` diff --git a/doc/sphinx-guides/source/developers/troubleshooting.rst b/doc/sphinx-guides/source/developers/troubleshooting.rst index 832785f9860..2c437ca8b2e 100755 --- a/doc/sphinx-guides/source/developers/troubleshooting.rst +++ b/doc/sphinx-guides/source/developers/troubleshooting.rst @@ -2,7 +2,7 @@ Troubleshooting =============== -Over in the :doc:`dev-environment` section we described the "happy path" of when everything goes right as you set up your Dataverse Software development environment. Here are some common problems and solutions for when things go wrong. +Over in the :doc:`classic-dev-env` section we described the "happy path" of when everything goes right as you set up your Dataverse Software development environment. Here are some common problems and solutions for when things go wrong. .. contents:: |toctitle| :local: @@ -110,7 +110,3 @@ If you are seeing ``Response code: 400, [url] domain of URL is not allowed`` it' ``./asadmin delete-jvm-options '-Ddataverse.siteUrl=http\://localhost\:8080'`` ``./asadmin create-jvm-options '-Ddataverse.siteUrl=http\://demo.dataverse.org'`` - ----- - -Previous: :doc:`tips` | Next: :doc:`version-control` diff --git a/doc/sphinx-guides/source/developers/unf/index.rst b/doc/sphinx-guides/source/developers/unf/index.rst index 2423877348f..596bb0cf3bf 100644 --- a/doc/sphinx-guides/source/developers/unf/index.rst +++ b/doc/sphinx-guides/source/developers/unf/index.rst @@ -27,7 +27,7 @@ with Dataverse Software 2.0 and throughout the 3.* lifecycle, UNF v.5 UNF v.6. Two parallel implementation, in R and Java, will be available, for cross-validation. -Learn more: Micah Altman and Gary King. 2007. “A Proposed Standard for the Scholarly Citation of Quantitative Data.†D-Lib Magazine, 13. Publisher’s Version Copy at http://j.mp/2ovSzoT +Learn more: Micah Altman and Gary King. 2007. “A Proposed Standard for the Scholarly Citation of Quantitative Data.†D-Lib Magazine, 13. Publisher’s Version Copy at https://j.mp/2ovSzoT **Contents:** @@ -37,7 +37,3 @@ Learn more: Micah Altman and Gary King. 2007. “A Proposed Standard for the Sch unf-v3 unf-v5 unf-v6 - ----- - -Previous: :doc:`/developers/tools` | Next: :doc:`/developers/remote-users` diff --git a/doc/sphinx-guides/source/developers/unf/unf-v3.rst b/doc/sphinx-guides/source/developers/unf/unf-v3.rst index 3f0018d7fa5..98c07b398e0 100644 --- a/doc/sphinx-guides/source/developers/unf/unf-v3.rst +++ b/doc/sphinx-guides/source/developers/unf/unf-v3.rst @@ -34,11 +34,11 @@ For example, the number pi at five digits is represented as -3.1415e+, and the n 1. Terminate character strings representing nonmissing values with a POSIX end-of-line character. -2. Encode each character string with `Unicode bit encoding `_. Versions 3 through 4 use UTF-32BE; Version 4.1 uses UTF-8. +2. Encode each character string with `Unicode bit encoding `_. Versions 3 through 4 use UTF-32BE; Version 4.1 uses UTF-8. 3. Combine the vector of character strings into a single sequence, with each character string separated by a POSIX end-of-line character and a null byte. -4. Compute a hash on the resulting sequence using the standard MD5 hashing algorithm for Version 3 and using `SHA256 `_ for Version 4. The resulting hash is `base64 `_ encoded to support readability. +4. Compute a hash on the resulting sequence using the standard MD5 hashing algorithm for Version 3 and using `SHA256 `_ for Version 4. The resulting hash is `base64 `_ encoded to support readability. 5. Calculate the UNF for each lower-level data object, using a consistent UNF version and level of precision across the individual UNFs being combined. @@ -49,4 +49,4 @@ For example, the number pi at five digits is represented as -3.1415e+, and the n 8. Combine UNFs from multiple variables to form a single UNF for an entire data frame, and then combine UNFs for a set of data frames to form a single UNF that represents an entire research study. Learn more: -Software for computing UNFs is available in an R Module, which includes a Windows standalone tool and code for Stata and SAS languages. Also see the following for more details: Micah Altman and Gary King. 2007. "A Proposed Standard for the Scholarly Citation of Quantitative Data," D-Lib Magazine, Vol. 13, No. 3/4 (March). (Abstract: `HTML `_ | Article: `PDF `_) +Software for computing UNFs is available in an R Module, which includes a Windows standalone tool and code for Stata and SAS languages. Also see the following for more details: Micah Altman and Gary King. 2007. "A Proposed Standard for the Scholarly Citation of Quantitative Data," D-Lib Magazine, Vol. 13, No. 3/4 (March). (Abstract: `HTML `_ | Article: `PDF `_) diff --git a/doc/sphinx-guides/source/developers/unf/unf-v6.rst b/doc/sphinx-guides/source/developers/unf/unf-v6.rst index 9648bae47c8..b2495ff3dd9 100644 --- a/doc/sphinx-guides/source/developers/unf/unf-v6.rst +++ b/doc/sphinx-guides/source/developers/unf/unf-v6.rst @@ -156,7 +156,7 @@ For example, to specify a non-default precision the parameter it is specified us | Allowed values are {``128`` , ``192`` , ``196`` , ``256``} with ``128`` being the default. | ``R1`` - **truncate** numeric values to ``N`` digits, **instead of rounding**, as previously described. -`Dr. Micah Altman's classic UNF v5 paper `_ mentions another optional parameter ``T###``, for specifying rounding of date and time values (implemented as stripping the values of entire components - fractional seconds, seconds, minutes, hours... etc., progressively) - but it doesn't specify its syntax. It is left as an exercise for a curious reader to contact the author and work out the details, if so desired. (Not implemented in UNF Version 6 by the Dataverse Project). +`Dr. Micah Altman's classic UNF v5 paper `_ mentions another optional parameter ``T###``, for specifying rounding of date and time values (implemented as stripping the values of entire components - fractional seconds, seconds, minutes, hours... etc., progressively) - but it doesn't specify its syntax. It is left as an exercise for a curious reader to contact the author and work out the details, if so desired. (Not implemented in UNF Version 6 by the Dataverse Project). Note: we do not recommend truncating character strings at fewer bytes than the default ``128`` (the ``X`` parameter). At the very least this number **must** be high enough so that the printable UNFs of individual variables or files are not truncated, when calculating combined UNFs of files or datasets, respectively. diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst index aacc245af5a..c5669d02e77 100644 --- a/doc/sphinx-guides/source/developers/version-control.rst +++ b/doc/sphinx-guides/source/developers/version-control.rst @@ -24,7 +24,7 @@ The goals of the Dataverse Software branching strategy are: - allow for concurrent development - only ship stable code -We follow a simplified "git flow" model described at http://nvie.com/posts/a-successful-git-branching-model/ involving a "master" branch, a "develop" branch, and feature branches such as "1234-bug-fix". +We follow a simplified "git flow" model described at https://nvie.com/posts/a-successful-git-branching-model/ involving a "master" branch, a "develop" branch, and feature branches such as "1234-bug-fix". Branches ~~~~~~~~ @@ -34,6 +34,8 @@ The "master" Branch The "`master `_" branch represents released versions of the Dataverse Software. As mentioned in the :doc:`making-releases` section, at release time we update the master branch to include all the code for that release. Commits are never made directly to master. Rather, master is updated only when we merge code into it from the "develop" branch. +.. _develop-branch: + The "develop" Branch ******************** @@ -65,21 +67,65 @@ The example of creating a pull request below has to do with fixing an important Find or Create a GitHub Issue ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -For guidance on which issue to work on, please ask! Also, see https://github.com/IQSS/dataverse/blob/develop/CONTRIBUTING.md +An issue represents a bug (unexpected behavior) or a new feature in Dataverse. We'll use the issue number in the branch we create for our pull request. + +Finding GitHub Issues to Work On +******************************** + +Assuming this is your first contribution to Dataverse, you should start with something small. The following issue labels might be helpful in your search: + +- `good first issue `_ (these appear at https://github.com/IQSS/dataverse/contribute ) +- `hacktoberfest `_ +- `Help Wanted: Code `_ +- `Help Wanted: Documentation `_ + +For guidance on which issue to work on, please ask! :ref:`getting-help-developers` explains how to get in touch. + +Creating GitHub Issues to Work On +********************************* + +You are very welcome to create a GitHub issue to work on. However, for significant changes, please reach out (see :ref:`getting-help-developers`) to make sure the team and community agree with the proposed change. + +For small changes and especially typo fixes, please don't worry about reaching out first. + +Communicate Which Issue You Are Working On +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Let's say you want to tackle https://github.com/IQSS/dataverse/issues/3728 which points out a typo in a page of the Dataverse Software's documentation. +In the issue you can simply leave a comment to say you're working on it. If you tell us your GitHub username we are happy to add you to the "read only" team at https://github.com/orgs/IQSS/teams/dataverse-readonly/members so that we can assign the issue to you while you're working on it. You can also tell us if you'd like to be added to the `Dataverse Community Contributors spreadsheet `_. -Create a New Branch off the develop Branch +Create a New Branch Off the develop Branch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Always create your feature branch from the latest code in develop, pulling the latest code if necessary. As mentioned above, your branch should have a name like "3728-doc-apipolicy-fix" that starts with the issue number you are addressing, and ends with a short, descriptive name. Dashes ("-") and underscores ("_") in your branch name are ok, but please try to avoid other special characters such as ampersands ("&") that have special meaning in Unix shells. +Always create your feature branch from the latest code in develop, pulling the latest code if necessary. As mentioned above, your branch should have a name like "3728-doc-apipolicy-fix" that starts with the issue number you are addressing (e.g. `#3728 `_) and ends with a short, descriptive name. Dashes ("-") and underscores ("_") in your branch name are ok, but please try to avoid other special characters such as ampersands ("&") that have special meaning in Unix shells. Commit Your Change to Your New Branch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Making a commit (or several commits) to that branch. Ideally the first line of your commit message includes the number of the issue you are addressing, such as ``Fixed BlockedApiPolicy #3728``. +For each commit to that branch, try to include the issue number along with a summary in the first line of the commit message, such as ``Fixed BlockedApiPolicy #3728``. You are welcome to write longer descriptions in the body as well! + +.. _writing-release-note-snippets: + +Writing a Release Note Snippet +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We highly value your insight as a contributor when in comes to describing your work in our release notes. Not every pull request will be mentioned in release notes but most are. + +As described at :ref:`write-release-notes`, at release time we compile together release note "snippets" into the final release notes. + +Here's how to add a release note snippet to your pull request: + +- Create a Markdown file under ``doc/release-notes``. You can reuse the name of your branch and append ".md" to it, e.g. ``3728-doc-apipolicy-fix.md`` +- Edit the snippet to include anything you think should be mentioned in the release notes, such as: + + - Descriptions of new features + - Explanations of bugs fixed + - New configuration settings + - Upgrade instructions + - Etc. + +Release note snippets do not need to be long. For a new feature, a single line description might be enough. Please note that your release note will likely be edited (expanded or shortened) when the final release notes are being created. Push Your Branch to GitHub ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -89,14 +135,16 @@ Push your feature branch to your fork of the Dataverse Software. Your git comman Make a Pull Request ~~~~~~~~~~~~~~~~~~~ -Make a pull request to get approval to merge your changes into the develop branch. Note that once a pull request is created, we'll remove the corresponding issue from our kanban board so that we're only tracking one card. +Make a pull request to get approval to merge your changes into the develop branch. +If the pull request notes indicate that release notes are necessary, the workflow can then verify the existence of a corresponding file and respond with a 'thank you!' message. On the other hand, if no release notes are detected, the contributor can be gently reminded of their absence. Please see :doc:`making-releases` for guidance on writing release notes. +Note that once a pull request is created, we'll remove the corresponding issue from our kanban board so that we're only tracking one card. Feedback on the pull request template we use is welcome! Here's an example of a pull request for issue #3827: https://github.com/IQSS/dataverse/pull/3827 Make Sure Your Pull Request Has Been Advanced to Code Review ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Now that you've made your pull request, your goal is to make sure it appears in the "Code Review" column at https://github.com/orgs/IQSS/projects/2. +Now that you've made your pull request, your goal is to make sure it appears in the "Code Review" column at https://github.com/orgs/IQSS/projects/34. Look at https://github.com/IQSS/dataverse/blob/master/CONTRIBUTING.md for various ways to reach out to developers who have enough access to the GitHub repo to move your issue and pull request to the "Code Review" column. @@ -238,7 +286,3 @@ GitHub documents how to make changes to a fork at https://help.github.com/articl vim path/to/file.txt git commit git push OdumInstitute 4709-postgresql_96 - ----- - -Previous: :doc:`troubleshooting` | Next: :doc:`sql-upgrade-scripts` diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst index f6eda53d718..3184160b387 100755 --- a/doc/sphinx-guides/source/index.rst +++ b/doc/sphinx-guides/source/index.rst @@ -20,6 +20,7 @@ These documentation guides are for the |version| version of Dataverse. To find g developers/index container/index style/index + qa/index.md How the Guides Are Organized ---------------------------- @@ -45,7 +46,7 @@ Other Resources Additional information about the Dataverse Project itself including presentations, information about upcoming releases, data management and citation, and announcements can be found at -`http://dataverse.org/ `__ +`https://dataverse.org/ `__ **User Group** @@ -68,7 +69,7 @@ The support email address is `support@dataverse.org `__ -or use `GitHub pull requests `__, +or use `GitHub pull requests `__, if you have some code, scripts or documentation that you'd like to share. If you have a **security issue** to report, please email `security@dataverse.org `__. See also :ref:`reporting-security-issues`. diff --git a/doc/sphinx-guides/source/installation/advanced.rst b/doc/sphinx-guides/source/installation/advanced.rst index 87f2a4fd0ab..3de5d0ea07c 100644 --- a/doc/sphinx-guides/source/installation/advanced.rst +++ b/doc/sphinx-guides/source/installation/advanced.rst @@ -7,6 +7,8 @@ Advanced installations are not officially supported but here we are at least doc .. contents:: |toctitle| :local: +.. _multiple-app-servers: + Multiple App Servers -------------------- diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f9fe74afc7c..2baa2827250 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1,4 +1,3 @@ -============= Configuration ============= @@ -143,7 +142,7 @@ The need to redirect port HTTP (port 80) to HTTPS (port 443) for security has al Your decision to proxy or not should primarily be driven by which features of the Dataverse Software you'd like to use. If you'd like to use Shibboleth, the decision is easy because proxying or "fronting" Payara with Apache is required. The details are covered in the :doc:`shibboleth` section. -Even if you have no interest in Shibboleth, you may want to front your Dataverse installation with Apache or nginx to simply the process of installing SSL certificates. There are many tutorials on the Internet for adding certs to Apache, including a some `notes used by the Dataverse Project team `_, but the process of adding a certificate to Payara is arduous and not for the faint of heart. The Dataverse Project team cannot provide much help with adding certificates to Payara beyond linking to `tips `_ on the web. +Even if you have no interest in Shibboleth, you may want to front your Dataverse installation with Apache or nginx to simply the process of installing SSL certificates. There are many tutorials on the Internet for adding certs to Apache, including a some `notes used by the Dataverse Project team `_, but the process of adding a certificate to Payara is arduous and not for the faint of heart. The Dataverse Project team cannot provide much help with adding certificates to Payara beyond linking to `tips `_ on the web. Still not convinced you should put Payara behind another web server? Even if you manage to get your SSL certificate into Payara, how are you going to run Payara on low ports such as 80 and 443? Are you going to run Payara as root? Bad idea. This is a security risk. Under "Additional Recommendations" under "Securing Your Installation" above you are advised to configure Payara to run as a user other than root. @@ -155,7 +154,7 @@ If you really don't want to front Payara with any proxy (not recommended), you c ``./asadmin set server-config.network-config.network-listeners.network-listener.http-listener-2.port=443`` -What about port 80? Even if you don't front your Dataverse installation with Apache, you may want to let Apache run on port 80 just to rewrite HTTP to HTTPS as described above. You can use a similar command as above to change the HTTP port that Payara uses from 8080 to 80 (substitute ``http-listener-1.port=80``). Payara can be used to enforce HTTPS on its own without Apache, but configuring this is an exercise for the reader. Answers here may be helpful: http://stackoverflow.com/questions/25122025/glassfish-v4-java-7-port-unification-error-not-able-to-redirect-http-to +What about port 80? Even if you don't front your Dataverse installation with Apache, you may want to let Apache run on port 80 just to rewrite HTTP to HTTPS as described above. You can use a similar command as above to change the HTTP port that Payara uses from 8080 to 80 (substitute ``http-listener-1.port=80``). Payara can be used to enforce HTTPS on its own without Apache, but configuring this is an exercise for the reader. Answers here may be helpful: https://stackoverflow.com/questions/25122025/glassfish-v4-java-7-port-unification-error-not-able-to-redirect-http-to If you are running an installation with Apache and Payara on the same server, and would like to restrict Payara from responding to any requests to port 8080 from external hosts (in other words, not through Apache), you can restrict the AJP listener to localhost only with: @@ -179,56 +178,383 @@ Persistent Identifiers and Publishing Datasets Persistent identifiers (PIDs) are a required and integral part of the Dataverse Software. They provide a URL that is guaranteed to resolve to the datasets or files they represent. The Dataverse Software currently supports creating -identifiers using one of several PID providers. The most appropriate PIDs for public data are DOIs (provided by +identifiers using any of several PID types. The most appropriate PIDs for public data are DOIs (e.g., provided by DataCite or EZID) and Handles. Dataverse also supports PermaLinks which could be useful for intranet or catalog use cases. A DOI provider called "FAKE" is recommended only for testing and development purposes. +Dataverse can be configured with one or more PID providers, each of which can mint and manage PIDs with a given protocol +(e.g., doi, handle, permalink) using a specific service provider/account (e.g. with DataCite, EZId, or HandleNet) +to manage an authority/shoulder combination, aka a "prefix" (PermaLinks also support custom separator characters as part of the prefix), +along with an optional list of individual PIDs (with different authority/shoulders) than can be managed with that account. + Testing PID Providers +++++++++++++++++++++ -By default, the installer configures the DataCite test service as the registration provider. DataCite requires that you -register for a test account, configured with your own prefix (please contact support@datacite.org). +By default, the installer configures the Fake DOI provider as the registration provider. Unlike other DOI Providers, the Fake Provider does not involve any +external resolution service and is not appropriate for use beyond development and testing. You may wish instead to test with +PermaLinks or with a DataCite test account (which uses DataCite's test infrastructure and will help assure your Dataverse instance can make network connections to DataCite. +DataCite requires that you register for a test account, which will have a username, password and your own prefix (please contact support@datacite.org for a test account. +You may wish to `contact the GDCC `_ instead - GDCC is able to provide DataCite accounts with a group discount and can also provide test accounts.). Once you receive the login name, password, and prefix for the account, -configure the credentials via :ref:`dataverse.pid.datacite.username` and -:ref:`dataverse.pid.datacite.password`, then restart Payara. - -Configure the prefix via the API (where it is referred to as :ref:`:Authority`): +configure the credentials as described below. -``curl -X PUT -d 10.xxxx http://localhost:8080/api/admin/settings/:Authority`` +Alternately, you may wish to configure other providers for testing: -.. TIP:: - This testing section is oriented around DataCite but other PID Providers can be tested as well. - - EZID is available to University of California scholars and researchers. Testing can be done using the authority 10.5072 and shoulder FK2 with the "apitest" account (contact EZID for credentials) or an institutional account. Configuration in Dataverse is then analogous to using DataCite. - - The PermaLink and FAKE DOI providers do not involve an external account. See :ref:`permalinks` and (for the FAKE DOI provider) the :doc:`/developers/dev-environment` section of the Developer Guide. + - The PermaLink provider, like the FAKE DOI provider, does not involve an external account. + Unlike the Fake DOI provider, the PermaLink provider creates PIDs that begin with "perma:", making it clearer that they are not DOIs, + and that do resolve to the local dataset/file page in Dataverse, making them useful for some production use cases. See :ref:`permalinks` and (for the FAKE DOI provider) the :doc:`/developers/dev-environment` section of the Developer Guide. -Once all is configured, you will be able to publish datasets and files, but **the persistent identifiers will not be citable**, -and they will only resolve from the DataCite test environment (and then only if the Dataverse installation from which -you published them is accessible - DOIs minted from your laptop will not resolve). Note that any datasets or files -created using the test configuration cannot be directly migrated and would need to be created again once a valid DOI -namespace is configured. +Provider-specific configuration is described below. -One you are done testing, to properly configure persistent identifiers for a production installation, an account and associated namespace must be -acquired for a fee from a DOI or HDL provider. **DataCite** (https://www.datacite.org) is the recommended DOI provider +Once all is configured, you will be able to publish datasets and files, but **the persistent identifiers will not be citable** +as they, with the exception of PermaLinks, will not redirect to your dataset page in Dataverse. + +Note that any datasets or files created using a test configuration cannot be directly migrated to a production PID provider +and would need to be created again once a valid PID Provider(s) are configured. + +One you are done testing, to properly configure persistent identifiers for a production installation, an account and associated namespace (e.g. authority/shoulder) must be +acquired for a fee from a DOI or HDL provider. (As noted above, PermaLinks May be appropriate for intranet and catalog uses cases.) +**DataCite** (https://www.datacite.org) is the recommended DOI provider (see https://dataversecommunity.global for more on joining DataCite through the Global Dataverse Community Consortium) but **EZID** (http://ezid.cdlib.org) is an option for the University of California according to https://www.cdlib.org/cdlinfo/2017/08/04/ezid-doi-service-is-evolving/ . **Handle.Net** (https://www.handle.net) is the HDL provider. -Once you have your DOI or Handle account credentials and a namespace, configure your Dataverse installation -using the JVM options and database settings below. +Once you have your DOI or Handle account credentials and a prefix, configure your Dataverse installation +using the settings below. + + +Configuring PID Providers ++++++++++++++++++++++++++ + +There are two required global settings to configure PID providers - the list of ids of providers and which one of those should be the default. +Per-provider settings are also required - some that are common to all types and some type specific. All of these settings are defined +to be compatible with the MicroProfile specification which means that + +1. Any of these settings can be set via system properties (see :ref:`jvm-options` for how to do this), environment variables, or other + MicroProfile Config mechanisms supported by the app server. + `See Payara docs for supported sources `_. +2. Remember to protect your secrets. For passwords, use an environment variable (bare minimum), a password alias named the same + as the key (OK) or use the `"dir config source" of Payara `_ (best). + + Alias creation example: + + .. code-block:: shell + + echo "AS_ADMIN_ALIASPASSWORD=changeme" > /tmp/p.txt + asadmin create-password-alias --passwordfile /tmp/p.txt dataverse.pid.datacite1.datacite.password + rm /tmp/p.txt + +3. Environment variables follow the key, replacing any dot, colon, dash, etc. into an underscore "_" and all uppercase + letters. Example: ``dataverse.pid.default-provider`` -> ``DATAVERSE_PID_DEFAULT_PROVIDER`` + +Global Settings +^^^^^^^^^^^^^^^ + +The following three global settings are required to configure PID Providers in the Dataverse software: + +.. _dataverse.pid.providers: + +dataverse.pid.providers +^^^^^^^^^^^^^^^^^^^^^^^ + +A comma-separated list of the ids of the PID providers to use. IDs should be simple unique text strings, e.g. datacite1, perma1, etc. +IDs are used to scope the provider-specific settings but are not directly visible to users. + +.. _dataverse.pid.default-provider: + +dataverse.pid.default-provider +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ID of the default PID provider to use. + +.. _dataverse.spi.pidproviders.directory: + +dataverse.spi.pidproviders.directory +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The path to the directory where JAR files containing additional types of PID Providers can be added. +Dataverse includes providers that support DOIs (DataCite, EZId, or FAKE), Handles, and PermaLinks. +PID provider jar files added to this directory can replace any of these or add new PID Providers. + +Per-Provider Settings +^^^^^^^^^^^^^^^^^^^^^ + +Each Provider listed by id in the dataverse.pid.providers setting must be configured with the following common settings and any settings that are specific to the provider type. + +.. _dataverse.pid.*.type: + +dataverse.pid.*.type +^^^^^^^^^^^^^^^^^^^^ + +The Provider type, currently one of ``datacite``, ``ezid``, ``FAKE``, ``hdl``, or ``perma``. The type defines which protocol a service supports (DOI, Handle, or PermaLink) and, for DOI Providers, which +DOI service is used. + +.. _dataverse.pid.*.label: + +dataverse.pid.*.label +^^^^^^^^^^^^^^^^^^^^^ + +A human-readable label for the provider + +.. _dataverse.pid.*.authority: + +dataverse.pid.*.authority +^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. _dataverse.pid.*.shoulder: + +dataverse.pid.*.shoulder +^^^^^^^^^^^^^^^^^^^^^^^^ + +In general, PIDs are of the form ``:/*`` where ``*`` is the portion unique to an individual PID. PID Providers must define +the authority and shoulder (with the protocol defined by the ``dataverse.pid.*.type`` setting) that defines the set of existing PIDs they can manage and the prefix they can use when minting new PIDs. +(Often an account with a PID service provider will be limited to using a single authority/shoulder. If your PID service provider account allows more than one combination that you wish to use in Dataverse, configure multiple PID Provider, one for each combination.) + +.. _dataverse.pid.*.identifier-generation-style: + +dataverse.pid.*.identifier-generation-style +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default, Pid Providers in Dataverse generate a random 6 character string, +pre-pended by the Shoulder if set, to use as the identifier for a Dataset. +Set this to ``storedProcGenerated`` to generate instead a custom *unique* +identifier (again pre-pended by the Shoulder if set) through a database +stored procedure or function (the assumed default setting is ``randomString``). +When using the ``storedProcGenerated`` setting, a stored procedure or function must be created in +the database. + +As a first example, the script below (downloadable +:download:`here `) produces +sequential numerical values. You may need to make some changes to suit your +system setup, see the comments for more information: + +.. literalinclude:: ../_static/util/createsequence.sql + :language: plpgsql + +As a second example, the script below (downloadable +:download:`here `) produces +sequential 8 character identifiers from a base36 representation of current +timestamp. + +.. literalinclude:: ../_static/util/identifier_from_timestamp.sql + :language: plpgsql + +Note that the SQL in these examples scripts is Postgres-specific. +If necessary, it can be reimplemented in any other SQL flavor - the standard +JPA code in the application simply expects the database to have a saved +function ("stored procedure") named ``generateIdentifierFromStoredProcedure()`` +returning a single ``varchar`` argument. + +Please note that this setting interacts with the ``dataverse.pid.*.datafile-pid-format`` +setting below to determine how datafile identifiers are generated. + + +.. _dataverse.pid.*.datafile-pid-format: + +dataverse.pid.*.datafile-pid-format +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This setting controls the way that the "identifier" component of a file's +persistent identifier (PID) relates to the PID of its "parent" dataset - for a give PID Provider. + +By default the identifier for a file is dependent on its parent dataset. +For example, if the identifier of a dataset is "TJCLKP", the identifier for +a file within that dataset will consist of the parent dataset's identifier +followed by a slash ("/"), followed by a random 6 character string, +yielding "TJCLKP/MLGWJO". Identifiers in this format are what you should +expect if you leave ``dataverse.pid.*.datafile-pid-format`` undefined or set it to +``DEPENDENT`` and have not changed the ``dataverse.pid.*.identifier-generation-style`` +setting from its default. + +Alternatively, the identifier for File PIDs can be configured to be +independent of Dataset PIDs using the setting ``INDEPENDENT``. +In this case, file PIDs will not contain the PIDs of their parent datasets, +and their PIDs will be generated the exact same way that datasets' PIDs are, +based on the ``dataverse.pid.*.identifier-generation-style`` setting described above +(random 6 character strings or custom unique identifiers through a stored +procedure, pre-pended by any shoulder). + +The chart below shows examples from each possible combination of parameters +from the two settings. ``dataverse.pid.*.identifier-generation-style`` can be either +``randomString`` (the default) or ``storedProcGenerated`` and +``dataverse.pid.*.datafile-pid-format`` can be either ``DEPENDENT`` (the default) or +``INDEPENDENT``. In the examples below the "identifier" for the dataset is +"TJCLKP" for ``randomString`` and "100001" for ``storedProcGenerated`` (when +using sequential numerical values, as described in +:ref:`dataverse.pid.*.identifier-generation-style` above), or "krby26qt" for +``storedProcGenerated`` (when using base36 timestamps, as described in +:ref:`dataverse.pid.*.identifier-generation-style` above). + ++-----------------+---------------+----------------------+---------------------+ +| | randomString | storedProcGenerated | storedProcGenerated | +| | | | | +| | | (sequential numbers) | (base36 timestamps) | ++=================+===============+======================+=====================+ +| **DEPENDENT** | TJCLKP/MLGWJO | 100001/1 | krby26qt/1 | ++-----------------+---------------+----------------------+---------------------+ +| **INDEPENDENT** | MLGWJO | 100002 | krby27pz | ++-----------------+---------------+----------------------+---------------------+ + +As seen above, in cases where ``dataverse.pid.*.identifier-generation-style`` is set to +``storedProcGenerated`` and ``dataverse.pid.*.datafile-pid-format`` is set to ``DEPENDENT``, +each file within a dataset will be assigned a number *within* that dataset +starting with "1". + +Otherwise, if ``dataverse.pid.*.datafile-pid-format`` is set to ``INDEPENDENT``, each file +within the dataset is assigned with a new PID which is the next available +identifier provided from the database stored procedure. In our example: +"100002" when using sequential numbers or "krby27pz" when using base36 +timestamps. + +.. _dataverse.pid.*.managed-list: + +dataverse.pid.*.managed-list +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. _dataverse.pid.*.excluded-list: + +dataverse.pid.*.excluded-list +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +With at least some PID services, it is possible for the authority(permission) to manage specific individual PIDs +to be transferred between accounts. To handle these cases, the individual PIDs, written in the +standard format, e.g. doi:10.5072/FK2ABCDEF can be added to the comma-separated ``managed`` or ``excluded`` list +for a given provider. For entries on the ``managed- list``, Dataverse will assume this PID +Provider/account can update the metadata and landing URL for the PID at the service provider +(even though it does not match the provider's authority/shoulder settings). Conversely, +Dataverse will assume that PIDs on the ``excluded-list`` cannot be managed/updated by this provider +(even though they match the provider's authority/shoulder settings). These settings are optional +with the default assumption that these lists are empty. + +.. _dataverse.pid.*.datacite: + +DataCite-specific Settings +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +dataverse.pid.*.datacite.mds-api-url +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +dataverse.pid.*.datacite.rest-api-url +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +dataverse.pid.*.datacite.username +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +dataverse.pid.*.datacite.password +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +PID Providers of type ``datacite`` require four additional parameters that define how the provider connects to DataCite. +DataCite has two APIs that are used in Dataverse: + +The base URL of the `DataCite MDS API `_, +used to mint and manage DOIs. Current valid values for ``dataverse.pid.*.datacite.mds-api-url`` are "https://mds.datacite.org" (production) and "https://mds.test.datacite.org" (testing, the default). + +The `DataCite REST API `_ is also used - :ref:`PIDs API ` information retrieval and :doc:`/admin/make-data-count`. +Current valid values for ``dataverse.pid.*.datacite.rest-api-url`` are "https://api.datacite.org" (production) and "https://api.test.datacite.org" (testing, the default). + +DataCite uses `HTTP Basic authentication `_ +for `Fabrica `_ and their APIs. You need to provide +the same credentials (``username``, ``password``) to Dataverse software to mint and manage DOIs for you. +As noted above, you should use one of the more secure options for setting the password. + + +.. _dataverse.pid.*.ezid: + +EZId-specific Settings +^^^^^^^^^^^^^^^^^^^^^^ + +dataverse.pid.*.ezid.api-url +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +dataverse.pid.*.ezid.username +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +dataverse.pid.*.ezid.password +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Note that use of `EZId `_ is limited primarily to University of California institutions. If you have an EZId account, +you will need to configure the ``api-url`` and your account ``username`` and ``password``. As above, you should use one of the more secure +options for setting the password. + +.. _dataverse.pid.*.permalink: + +PermaLink-specific Settings +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +dataverse.pid.*.permalink.base-url +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +dataverse.pid.*.permalink.separator +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +PermaLinks are a simple PID option intended for intranet and catalog use cases. They can be used without an external service or +be configured with the ``base-url`` of a resolution service. PermaLinks also allow a custom ``separator`` to be used. (Note: when using multiple +PermaLink providers, you should avoid ambiguous authority/separator/shoulder combinations that would result in the same overall prefix.) + +.. _dataverse.pid.*.handlenet: + +Handle-specific Settings +^^^^^^^^^^^^^^^^^^^^^^^^ + +dataverse.pid.*.handlenet.index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +dataverse.pid.*.handlenet.independent-service +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +dataverse.pid.*.handlenet.auth-handle +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +dataverse.pid.*.handlenet.key +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +dataverse.pid.*.handlenet.path +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +dataverse.pid.*.handlenet.passphrase +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Note: If you are **minting your own handles** and plan to set up your own handle service, please refer to `Handle.Net documentation `_. + +Configure your Handle.net ``index`` to be used registering new persistent +identifiers. Defaults to ``300``. + +Indices are used to separate concerns within the Handle system. To add data to +an index, authentication is mandatory. See also chapter 1.4 "Authentication" of +the `Handle.Net Technical Documentation `__ + +Handle.Net servers use a public key authentication method where the public key +is stored in a handle itself and the matching private key is provided from this +file. Typically, the absolute path ends like ``handle/svr_1/admpriv.bin``. +The key file may (and should) be encrypted with a passphrase (used for +encryption with AES-128). See +also chapter 1.4 "Authentication" of the `Handle.Net Technical Documentation +`__ + +Provide an absolute ``key.path`` to a private key file authenticating requests to your +Handle.Net server. + +Provide a ``key.passphrase`` to decrypt the private key file at ``dataverse.pid.*.handlenet.key.path``. + +Set ``independent-service`` to true if you want to use a Handle service which is setup to work 'independently' (No communication with the Global Handle Registry). +By default this setting is false. + +Set ``auth-handle`` to / to be used on a global handle service when the public key is NOT stored in the default handle. +This setting is optional. If the public key is, for instance, stored in handle: ``21.T12996/USER01``, ``auth-handle`` should be set to this value. + .. _pids-doi-configuration: -Configuring Your Dataverse Installation for DOIs -++++++++++++++++++++++++++++++++++++++++++++++++ +Backward-compatibility for Single PID Provider Installations +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -As explained above, by default your Dataverse installation attempts to register DOIs for each -dataset and file under a test authority. You must apply for your own credentials. +While using the PID Provider configuration settings described above is recommended, Dataverse installations +only using a single PID Provider can use the settings below instead. In general, these legacy settings mirror +those above except for not including a PID Provider id. -Here are the configuration options for DOIs: +Configuring Your Dataverse Installation for a Single DOI Provider +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Here are the configuration options for DOIs.: **JVM Options for DataCite:** @@ -258,8 +584,8 @@ this provider. .. _pids-handle-configuration: -Configuring Your Dataverse Installation for Handles -+++++++++++++++++++++++++++++++++++++++++++++++++++ +Configuring Your Dataverse Installation for a Single Handle Provider +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Here are the configuration options for handles. Most notably, you need to change the ``:Protocol`` setting, as it defaults to DOI usage. @@ -279,16 +605,12 @@ change the ``:Protocol`` setting, as it defaults to DOI usage. - :ref:`:IndependentHandleService <:IndependentHandleService>` (optional) - :ref:`:HandleAuthHandle <:HandleAuthHandle>` (optional) -Note: If you are **minting your own handles** and plan to set up your own handle service, please refer to `Handle.Net documentation `_. +Note: If you are **minting your own handles** and plan to set up your own handle service, please refer to `Handle.Net documentation `_. .. _permalinks: -Configuring Your Dataverse Installation for PermaLinks -++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -PermaLinks are a simple mechanism to provide persistent URLs for datasets and datafiles (if configured) that does not involve an external service providing metadata-based search services. -They are potentially appropriate for Intranet use cases as well as in cases where Dataverse is being used as a catalog or holding duplicate copies of datasets where the authoritative copy already has a DOI or Handle. -PermaLinks use the protocol "perma" (versus "doi" or "handle") and do not use a "/" character as a separator between the authority and shoulder. It is recommended to choose an alphanumeric value for authority that does not resemble that of DOIs (which are primarily numeric and start with "10." as in "10.5072") to avoid PermaLinks being mistaken for DOIs. +Configuring Your Dataverse Installation for a Single PermaLink Provider +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Here are the configuration options for PermaLinks: @@ -305,6 +627,8 @@ Here are the configuration options for PermaLinks: - :ref:`:DataFilePIDFormat <:DataFilePIDFormat>` (optional) - :ref:`:FilePIDsEnabled <:FilePIDsEnabled>` (optional, defaults to false) +You must restart Payara after making changes to these settings. + .. _auth-modes: Auth Modes: Local vs. Remote vs. Both @@ -498,14 +822,18 @@ Logging & Slow Performance .. _file-storage: -File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Stores ------------------------------------------------------------------------------------------------------ +File Storage +------------ By default, a Dataverse installation stores all data files (files uploaded by end users) on the filesystem at ``/usr/local/payara6/glassfish/domains/domain1/files``. This path can vary based on answers you gave to the installer (see the :ref:`dataverse-installer` section of the Installation Guide) or afterward by reconfiguring the ``dataverse.files.\.directory`` JVM option described below. -A Dataverse installation can alternately store files in a Swift or S3-compatible object store, and can now be configured to support multiple stores at once. With a multi-store configuration, the location for new files can be controlled on a per-Dataverse collection basis. +A Dataverse installation can alternately store files in a Swift or S3-compatible object store, or on a Globus endpoint, and can now be configured to support multiple stores at once. With a multi-store configuration, the location for new files can be controlled on a per-Dataverse collection basis. + +A Dataverse installation may also be configured to reference some files (e.g. large and/or sensitive data) stored in a web or Globus accessible trusted remote store. -A Dataverse installation may also be configured to reference some files (e.g. large and/or sensitive data) stored in a web-accessible trusted remote store. +A Dataverse installation can be configured to allow out of band upload by setting the ``dataverse.files.\.upload-out-of-band`` JVM option to ``true``. +By default, Dataverse supports uploading files via the :ref:`add-file-api`. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). +With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the :ref:`Adding the Uploaded file to the Dataset ` API call (described in the :doc:`/developers/s3-direct-upload-api` page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. The following sections describe how to set up various types of stores and how to configure for multiple stores. @@ -534,6 +862,27 @@ If you wish to change which store is used by default, you'll need to delete the It is also possible to set maximum file upload size limits per store. See the :ref:`:MaxFileUploadSizeInBytes` setting below. +.. _labels-file-stores: + +Labels for File Stores +++++++++++++++++++++++ + +If you find yourself adding many file stores with various configurations such as per-file limits and direct upload, you might find it helpful to make the label descriptive. + +For example, instead of simply labeling an S3 store as "S3"... + +.. code-block:: none + + ./asadmin create-jvm-options "\-Ddataverse.files.s3xl.label=S3" + +... you might want to include some extra information such as the example below. + +.. code-block:: none + + ./asadmin create-jvm-options "\-Ddataverse.files.s3xl.label=S3XL, Filesize limit: 100GB, direct-upload" + +Please keep in mind that the UI will only show so many characters, so labels are best kept short. + .. _storage-files-dir: File Storage @@ -550,7 +899,7 @@ Multiple file stores should specify different directories (which would nominally Swift Storage +++++++++++++ -Rather than storing data files on the filesystem, you can opt for an experimental setup with a `Swift Object Storage `_ backend. Each dataset that users create gets a corresponding "container" on the Swift side, and each data file is saved as a file within that container. +Rather than storing data files on the filesystem, you can opt for an experimental setup with a `Swift Object Storage `_ backend. Each dataset that users create gets a corresponding "container" on the Swift side, and each data file is saved as a file within that container. **In order to configure a Swift installation,** you need to complete these steps to properly modify the JVM options: @@ -566,7 +915,7 @@ First, run all the following create commands with your Swift endpoint informatio ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.files..username.endpoint1=your-username" ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.files..endpoint.endpoint1=your-swift-endpoint" -``auth_type`` can either be ``keystone``, ``keystone_v3``, or it will assumed to be ``basic``. ``auth_url`` should be your keystone authentication URL which includes the tokens (e.g. for keystone, ``https://openstack.example.edu:35357/v2.0/tokens`` and for keystone_v3, ``https://openstack.example.edu:35357/v3/auth/tokens``). ``swift_endpoint`` is a URL that looks something like ``http://rdgw.swift.example.org/swift/v1``. +``auth_type`` can either be ``keystone``, ``keystone_v3``, or it will assumed to be ``basic``. ``auth_url`` should be your keystone authentication URL which includes the tokens (e.g. for keystone, ``https://openstack.example.edu:35357/v2.0/tokens`` and for keystone_v3, ``https://openstack.example.edu:35357/v3/auth/tokens``). ``swift_endpoint`` is a URL that looks something like ``https://rdgw.swift.example.org/swift/v1``. Then create a password alias by running (without changes): @@ -662,7 +1011,7 @@ You'll need an AWS account with an associated S3 bucket for your installation to **Make note** of the **bucket's name** and the **region** its data is hosted in. To **create a user** with full S3 access and nothing more for security reasons, we recommend using IAM -(Identity and Access Management). See `IAM User Guide `_ +(Identity and Access Management). See `IAM User Guide `_ for more info on this process. To use programmatic access, **Generate the user keys** needed for a Dataverse installation afterwards by clicking on the created user. @@ -733,7 +1082,7 @@ Additional profiles can be added to these files by appending the relevant inform aws_access_key_id = aws_secret_access_key = -Place these two files in a folder named ``.aws`` under the home directory for the user running your Dataverse Installation on Payara. (From the `AWS Command Line Interface Documentation `_: +Place these two files in a folder named ``.aws`` under the home directory for the user running your Dataverse Installation on Payara. (From the `AWS Command Line Interface Documentation `_: "In order to separate credentials from less sensitive options, region and output format are stored in a separate file named config in the same folder") @@ -799,27 +1148,28 @@ List of S3 Storage Options .. table:: :align: left - =========================================== ================== ========================================================================== ============= - JVM Option Value Description Default value - =========================================== ================== ========================================================================== ============= - dataverse.files.storage-driver-id Enable as the default storage driver. ``file`` - dataverse.files..type ``s3`` **Required** to mark this storage as S3 based. (none) - dataverse.files..label **Required** label to be shown in the UI for this storage (none) - dataverse.files..bucket-name The bucket name. See above. (none) - dataverse.files..download-redirect ``true``/``false`` Enable direct download or proxy through Dataverse. ``false`` - dataverse.files..upload-redirect ``true``/``false`` Enable direct upload of files added to a dataset to the S3 store. ``false`` - dataverse.files..ingestsizelimit Maximum size of directupload files that should be ingested (none) - dataverse.files..url-expiration-minutes If direct uploads/downloads: time until links expire. Optional. 60 - dataverse.files..min-part-size Multipart direct uploads will occur for files larger than this. Optional. ``1024**3`` - dataverse.files..custom-endpoint-url Use custom S3 endpoint. Needs URL either with or without protocol. (none) - dataverse.files..custom-endpoint-region Only used when using custom endpoint. Optional. ``dataverse`` - dataverse.files..profile Allows the use of AWS profiles for storage spanning multiple AWS accounts. (none) - dataverse.files..proxy-url URL of a proxy protecting the S3 store. Optional. (none) - dataverse.files..path-style-access ``true``/``false`` Use path style buckets instead of subdomains. Optional. ``false`` - dataverse.files..payload-signing ``true``/``false`` Enable payload signing. Optional ``false`` - dataverse.files..chunked-encoding ``true``/``false`` Disable chunked encoding. Optional ``true`` - dataverse.files..connection-pool-size The maximum number of open connections to the S3 server ``256`` - =========================================== ================== ========================================================================== ============= + =========================================== ================== =================================================================================== ============= + JVM Option Value Description Default value + =========================================== ================== =================================================================================== ============= + dataverse.files.storage-driver-id Enable as the default storage driver. ``file`` + dataverse.files..type ``s3`` **Required** to mark this storage as S3 based. (none) + dataverse.files..label **Required** label to be shown in the UI for this storage (none) + dataverse.files..bucket-name The bucket name. See above. (none) + dataverse.files..download-redirect ``true``/``false`` Enable direct download or proxy through Dataverse. ``false`` + dataverse.files..upload-redirect ``true``/``false`` Enable direct upload of files added to a dataset in the S3 store. ``false`` + dataverse.files..upload-out-of-band ``true``/``false`` Allow upload of files by out-of-band methods (using some tool other than Dataverse) ``false`` + dataverse.files..ingestsizelimit Maximum size of directupload files that should be ingested (none) + dataverse.files..url-expiration-minutes If direct uploads/downloads: time until links expire. Optional. 60 + dataverse.files..min-part-size Multipart direct uploads will occur for files larger than this. Optional. ``1024**3`` + dataverse.files..custom-endpoint-url Use custom S3 endpoint. Needs URL either with or without protocol. (none) + dataverse.files..custom-endpoint-region Only used when using custom endpoint. Optional. ``dataverse`` + dataverse.files..profile Allows the use of AWS profiles for storage spanning multiple AWS accounts. (none) + dataverse.files..proxy-url URL of a proxy protecting the S3 store. Optional. (none) + dataverse.files..path-style-access ``true``/``false`` Use path style buckets instead of subdomains. Optional. ``false`` + dataverse.files..payload-signing ``true``/``false`` Enable payload signing. Optional ``false`` + dataverse.files..chunked-encoding ``true``/``false`` Disable chunked encoding. Optional ``true`` + dataverse.files..connection-pool-size The maximum number of open connections to the S3 server ``256`` + =========================================== ================== =================================================================================== ============= .. table:: :align: left @@ -859,7 +1209,7 @@ You may provide the values for these via any `supported MicroProfile Config API Reported Working S3-Compatible Storage ###################################### -`Minio v2018-09-12 `_ +`Minio v2018-09-12 `_ Set ``dataverse.files..path-style-access=true``, as Minio works path-based. Works pretty smooth, easy to setup. **Can be used for quick testing, too:** just use the example values above. Uses the public (read: unsecure and possibly slow) https://play.minio.io:9000 service. @@ -952,7 +1302,7 @@ Once you have configured a trusted remote store, you can point your users to the dataverse.files..type ``remote`` **Required** to mark this storage as remote. (none) dataverse.files..label **Required** label to be shown in the UI for this storage. (none) dataverse.files..base-url **Required** All files must have URLs of the form /* . (none) - dataverse.files..base-store **Optional** The id of a base store (of type file, s3, or swift). (the default store) + dataverse.files..base-store **Required** The id of a base store (of type file, s3, or swift). (the default store) dataverse.files..download-redirect ``true``/``false`` Enable direct download (should usually be true). ``false`` dataverse.files..secret-key A key used to sign download requests sent to the remote store. Optional. (none) dataverse.files..url-expiration-minutes If direct downloads and using signing: time until links expire. Optional. 60 @@ -961,6 +1311,47 @@ Once you have configured a trusted remote store, you can point your users to the =========================================== ================== ========================================================================== =================== +.. _globus-storage: + +Globus Storage +++++++++++++++ + +Globus stores allow Dataverse to manage files stored in Globus endpoints or to reference files in remote Globus endpoints, with users leveraging Globus to transfer files to/from Dataverse (rather than using HTTP/HTTPS). +See :doc:`/developers/big-data-support` for additional information on how to use a globus store. Consult the `Globus documentation `_ for information about using Globus and configuring Globus endpoints. + +In addition to having the type "globus" and requiring a label, Globus Stores share many options with Trusted Remote Stores and options to specify and access a Globus endpoint(s). As with Remote Stores, Globus Stores also use a baseStore - a file, s3, or swift store that can be used to store additional ancillary dataset files (e.g. metadata exports, thumbnails, auxiliary files, etc.). +These and other available options are described in the table below. + +There are two types of Globus stores: + +- managed - where Dataverse manages the Globus endpoint, deciding where transferred files are stored and managing access control for users transferring files to/from Dataverse +- remote - where Dataverse references files that remain on trusted remote Globus endpoints + +A managed Globus store connects to standard/file-based Globus endpoint. It is also possible to configure an S3 store as a managed store, if the managed endpoint uses an underlying S3 store via the Globus S3 Connector. +With the former, Dataverse has no direct access to the file contents and functionality related to ingest, fixity hash validation, etc. are not available. With the latter, Dataverse can access files internally via S3 and the functionality supported is similar to that when using S3 direct upload. + +Once you have configured a globus store, or configured an S3 store for Globus access, it is recommended that you install the `dataverse-globus app `_ to allow transfers in/out of Dataverse to be initated via the Dataverse user interface. Alternately, you can point your users to the :doc:`/developers/globus-api` for information about API support. + +.. table:: + :align: left + + ======================================================= ================== ========================================================================== =================== + JVM Option Value Description Default value + ======================================================= ================== ========================================================================== =================== + dataverse.files..type ``globus`` **Required** to mark this storage as globus enabled. (none) + dataverse.files..label **Required** label to be shown in the UI for this storage. (none) + dataverse.files..base-store **Required** The id of a base store (of type file, s3, or swift). (the default store) + dataverse.files..remote-store-name A short name used in the UI to indicate where a file is located. Optional. (none) + dataverse.files..remote-store-url A url to an info page about the remote store used in the UI. Optional. (none) + dataverse.files..managed ``true``/``false`` Whether dataverse manages an associated Globus endpoint ``false`` + dataverse.files..transfer-endpoint-with-basepath The *managed* Globus endpoint id and associated base path for file storage (none) + dataverse.files..globus-token A Globus token (base64 endcoded : + for a managed store) - using a microprofile alias is recommended (none) + dataverse.files..reference-endpoints-with-basepaths A comma separated list of *remote* trusted Globus endpoint id/s (none) + dataverse.files..files-not-accessible-by-dataverse ``true``/``false`` Should be false for S3 Connector-based *managed* stores, true for others ``false`` + + ======================================================= ================== ========================================================================== =================== + .. _temporary-file-storage: Temporary Upload File Storage @@ -1270,6 +1661,8 @@ The list below depicts a set of tools that can be used to ease the amount of wor - `easyTranslationHelper `_, a tool developed by `University of Aveiro `_. +- `Dataverse General User Interface Translation Guide for Weblate `_, a guide produced as part of the `SSHOC Dataverse Translation `_ event. + .. _Web-Analytics-Code: Web Analytics Code @@ -1425,24 +1818,25 @@ BagIt file handler configuration settings: BagIt Export ------------ -Your Dataverse installation may be configured to submit a copy of published Datasets, packaged as `Research Data Alliance conformant `_ zipped `BagIt `_ archival Bags (sometimes called BagPacks) to `Chronopolis `_ via `DuraCloud `_ or alternately to any folder on the local filesystem. +Your Dataverse installation may be configured to submit a copy of published Datasets, packaged as `Research Data Alliance conformant `_ zipped `BagIt `_ archival Bags (sometimes called BagPacks) to one of several supported storage services. +Supported services include `Chronopolis `_ via `DuraCloud `_, Google's Cloud, and any service that can provide an S3 interface or handle files transferred to a folder on the local filesystem. -These archival Bags include all of the files and metadata in a given dataset version and are sufficient to recreate the dataset, e.g. in a new Dataverse instance, or postentially in another RDA-conformant repository. +These archival Bags include all of the files and metadata in a given dataset version and are sufficient to recreate the dataset, e.g. in a new Dataverse instance, or potentially in another RDA-conformant repository. The `DVUploader `_ includes functionality to recreate a Dataset from an archival Bag produced by Dataverse. (Note that this functionality is distinct from the :ref:`BagIt File Handler` upload files to an existing Dataset via the Dataverse user interface.) The Dataverse Software offers an internal archive workflow which may be configured as a PostPublication workflow via an admin API call to manually submit previously published Datasets and prior versions to a configured archive such as Chronopolis. The workflow creates a `JSON-LD `_ serialized `OAI-ORE `_ map file, which is also available as a metadata export format in the Dataverse Software web interface. At present, archiving classes include the DuraCloudSubmitToArchiveCommand, LocalSubmitToArchiveCommand, GoogleCloudSubmitToArchive, and S3SubmitToArchiveCommand , which all extend the AbstractSubmitToArchiveCommand and use the configurable mechanisms discussed below. (A DRSSubmitToArchiveCommand, which works with Harvard's DRS also exists and, while specific to DRS, is a useful example of how Archivers can support single-version-only semantics and support archiving only from specified collections (with collection specific parameters)). -All current options support the archival status APIs and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers). +All current options support the :ref:`Archival Status API` calls and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers). .. _Duracloud Configuration: Duracloud Configuration +++++++++++++++++++++++ -Also note that while the current Chronopolis implementation generates the archival Bag and submits it to the archive's DuraCloud interface, the step to make a 'snapshot' of the space containing the archival Bag (and verify it's successful submission) are actions a curator must take in the DuraCloud interface. +The current Chronopolis implementation generates the archival Bag and submits it to the archive's DuraCloud interface. The step to make a 'snapshot' of the space containing the archival Bag (and verify it's successful submission) are actions a curator must take in the DuraCloud interface. -The minimal configuration to support an archiver integration involves adding a minimum of two Dataverse Software Keys and any required Payara jvm options. The example instructions here are specific to the DuraCloud Archiver\: +The minimal configuration to support archiver integration involves adding a minimum of two Dataverse Software settings. Individual archivers may require additional settings and/or Payara jvm options and micro-profile settings. The example instructions here are specific to the DuraCloud Archiver\: \:ArchiverClassName - the fully qualified class to be used for archiving. For example: @@ -1452,7 +1846,7 @@ The minimal configuration to support an archiver integration involves adding a m ``curl http://localhost:8080/api/admin/settings/:ArchiverSettings -X PUT -d ":DuraCloudHost, :DuraCloudPort, :DuraCloudContext, :BagGeneratorThreads"`` -The DPN archiver defines three custom settings, one of which is required (the others have defaults): +The DuraCloud archiver defines three custom settings, one of which is required (the others have defaults): \:DuraCloudHost - the URL for your organization's Duracloud site. For example: @@ -1599,6 +1993,25 @@ The workflow id returned in this call (or available by doing a GET of /api/admin Once these steps are taken, new publication requests will automatically trigger submission of an archival copy to the specified archiver, Chronopolis' DuraCloud component in this example. For Chronopolis, as when using the API, it is currently the admin's responsibility to snap-shot the DuraCloud space and monitor the result. Failure of the workflow, (e.g. if DuraCloud is unavailable, the configuration is wrong, or the space for this dataset already exists due to a prior publication action or use of the API), will create a failure message but will not affect publication itself. +.. _bag-info.txt: + +Configuring bag-info.txt +++++++++++++++++++++++++ + +Out of the box, placeholder values like below will be placed in bag-info.txt: + +.. code-block:: text + + Source-Organization: Dataverse Installation () + Organization-Address: + Organization-Email: + +To customize these values for your institution, use the following JVM options: + +- :ref:`dataverse.bagit.sourceorg.name` +- :ref:`dataverse.bagit.sourceorg.address` +- :ref:`dataverse.bagit.sourceorg.email` + Going Live: Launching Your Production Deployment ------------------------------------------------ @@ -1692,6 +2105,11 @@ When changing values these values with ``asadmin``, you'll need to delete the ol It's also possible to change these values by stopping Payara, editing ``payara6/glassfish/domains/domain1/config/domain.xml``, and restarting Payara. +In addition, JVM options enabled for "MicroProfile Config" (see docs of any option), can be used with any +`supported MicroProfile Config API source`_ to provide their values. The most notable source are environment variables; +many examples are given in detail documentation of enabled options. + + .. _dataverse.fqdn: dataverse.fqdn @@ -1759,8 +2177,8 @@ protocol, host, and port number and should not include a trailing slash. dataverse.files.directory +++++++++++++++++++++++++ -Please provide an absolute path to a directory backed by some mounted file system. This directory is used for a number -of purposes: +Providing an explicit location here makes it easier to reuse some mounted filesystem and we recommend doing so +to avoid filled up disks, aid in performance, etc. This directory is used for a number of purposes: 1. ``/temp`` after uploading, data is temporarily stored here for ingest and/or before shipping to the final storage destination. @@ -1773,24 +2191,51 @@ of purposes: under certain conditions. This directory may also be used by file stores for :ref:`permanent file storage `, but this is controlled by other, store-specific settings. -Defaults to ``/tmp/dataverse``. Can also be set via *MicroProfile Config API* sources, e.g. the environment variable -``DATAVERSE_FILES_DIRECTORY``. Defaults to ``${STORAGE_DIR}`` for profile ``ct``, important for the -:ref:`Dataverse Application Image `. +Notes: + +- Please provide an absolute path to a directory backed by some mounted file system. +- Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FILES_DIRECTORY``. +- Defaults to ``/tmp/dataverse`` in a :doc:`default installation `. +- Defaults to ``${STORAGE_DIR}`` using our :ref:`Dataverse container ` (resolving to ``/dv``). +- During startup, this directory will be checked for existence and write access. It will be created for you + if missing. If it cannot be created or does not have proper write access, application deployment will fail. .. _dataverse.files.uploads: dataverse.files.uploads +++++++++++++++++++++++ -Configure a folder to store the incoming file stream during uploads (before transfering to `${dataverse.files.directory}/temp`). +Configure a folder to store the incoming file stream during uploads (before transfering to ``${dataverse.files.directory}/temp``). +Providing an explicit location here makes it easier to reuse some mounted filesystem. Please also see :ref:`temporary-file-storage` for more details. -You can use an absolute path or a relative, which is relative to the application server domain directory. -Defaults to ``./uploads``, which resolves to ``/usr/local/payara6/glassfish/domains/domain1/uploads`` in a default -installation. +Notes: + +- Please provide an absolute path to a directory backed by some mounted file system. +- Defaults to ``${com.sun.aas.instanceRoot}/uploads`` in a :doc:`default installation ` + (resolving to ``/usr/local/payara6/glassfish/domains/domain1/uploads``). +- Defaults to ``${STORAGE_DIR}/uploads`` using our :ref:`Dataverse container ` (resolving to ``/dv/uploads``). +- Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FILES_UPLOADS``. +- During startup, this directory will be checked for existence and write access. It will be created for you + if missing. If it cannot be created or does not have proper write access, application deployment will fail. -Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FILES_UPLOADS``. -Defaults to ``${STORAGE_DIR}/uploads`` for profile ``ct``, important for the :ref:`Dataverse Application Image `. +.. _dataverse.files.docroot: + +dataverse.files.docroot ++++++++++++++++++++++++ + +Configure a folder to store and retrieve additional materials like user uploaded collection logos, generated sitemaps, +and so on. Providing an explicit location here makes it easier to reuse some mounted filesystem. +See also logo customization above. + +Notes: + +- Defaults to ``${com.sun.aas.instanceRoot}/docroot`` in a :doc:`default installation ` + (resolves to ``/usr/local/payara6/glassfish/domains/domain1/docroot``). +- Defaults to ``${STORAGE_DIR}/docroot`` using our :ref:`Dataverse container ` (resolving to ``/dv/docroot``). +- Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FILES_DOCROOT``. +- During startup, this directory will be checked for existence and write access. It will be created for you + if missing. If it cannot be created or does not have proper write access, application deployment will fail. dataverse.auth.password-reset-timeout-in-minutes ++++++++++++++++++++++++++++++++++++++++++++++++ @@ -1992,8 +2437,8 @@ For limiting the size (in bytes) of thumbnail images generated from files. The d .. _dataverse.pid.datacite.mds-api-url: -dataverse.pid.datacite.mds-api-url -++++++++++++++++++++++++++++++++++ +Legacy Single PID Provider: dataverse.pid.datacite.mds-api-url +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Configure the base URL of the `DataCite MDS API `_, used to mint and manage DOIs. Valid values are "https://mds.datacite.org" and "https://mds.test.datacite.org" @@ -2026,8 +2471,8 @@ Without setting an option, always defaults to testing API endpoint. .. _dataverse.pid.datacite.rest-api-url: -dataverse.pid.datacite.rest-api-url -+++++++++++++++++++++++++++++++++++ +Legacy Single PID Provider: dataverse.pid.datacite.rest-api-url ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Configure the base URL endpoint of the `DataCite REST API `_, used for :ref:`PIDs API ` information retrieval and :doc:`/admin/make-data-count`. @@ -2054,8 +2499,8 @@ you can issue the following command: .. _dataverse.pid.datacite.username: -dataverse.pid.datacite.username -+++++++++++++++++++++++++++++++ +Legacy Single PID Provider: dataverse.pid.datacite.username ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ DataCite uses `HTTP Basic authentication `_ for `Fabrica `_ and their APIs. You need to provide @@ -2076,8 +2521,8 @@ Once you have a username from DataCite, you can enter it like this: .. _dataverse.pid.datacite.password: -dataverse.pid.datacite.password -+++++++++++++++++++++++++++++++ +Legacy Single PID Provider: dataverse.pid.datacite.password ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Once you have a password from your provider, you should create a password alias. This avoids storing it in clear text, although you could use a JVM option `to reference @@ -2103,8 +2548,8 @@ To manage these, read up on `Payara docs about password aliases `. @@ -2125,8 +2570,8 @@ and re-add it. .. _dataverse.pid.handlenet.key.passphrase: -dataverse.pid.handlenet.key.passphrase -++++++++++++++++++++++++++++++++++++++ +Legacy Single PID Provider: dataverse.pid.handlenet.key.passphrase +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Related to :ref:`Handle.Net PID provider usage `. @@ -2146,8 +2591,8 @@ the old JVM option and the wrapped password alias, then recreate as shown for .. _dataverse.pid.handlenet.index: -dataverse.pid.handlenet.index -+++++++++++++++++++++++++++++ +Legacy Single PID Provider: dataverse.pid.handlenet.index ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Related to :ref:`Handle.Net PID provider usage `. @@ -2165,8 +2610,8 @@ re-add it. .. _dataverse.pid.permalink.base-url: -dataverse.pid.permalink.base-url -++++++++++++++++++++++++++++++++ +Legacy Single PID Provider: dataverse.pid.permalink.base-url +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ When using :ref:`PermaLinks `, this setting can be used to configure an external resolver. Dataverse will associate a PermaLink PID with the URL: ``/citation?persistentId=perma:``. The default value is your Dataverse site URL, which will result in PermaLinks correctly resolving to the appropriate dataset page. @@ -2187,8 +2632,8 @@ variable ``DATAVERSE_PID_PERMALINK_BASE_URL``. This setting was formerly known a .. _dataverse.pid.ezid.api-url: -dataverse.pid.ezid.api-url -++++++++++++++++++++++++++ +Legacy Single PID Provider: dataverse.pid.ezid.api-url +++++++++++++++++++++++++++++++++++++++++++++++++++++++ The EZID DOI provider is likely not an option if you are `not associated with California Digital Library (CDL) or Purdue University @@ -2202,8 +2647,8 @@ variable ``DATAVERSE_PID_EZID_API_URL``. This setting was formerly known as .. _dataverse.pid.ezid.username: -dataverse.pid.ezid.username -+++++++++++++++++++++++++++ +Legacy Single PID Provider: dataverse.pid.ezid.username ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ The EZID DOI provider is likely not an option if you are `not associated with California Digital Library (CDL) or Purdue University @@ -2220,8 +2665,8 @@ should delete and re-add it. .. _dataverse.pid.ezid.password: -dataverse.pid.ezid.password -+++++++++++++++++++++++++++ +Legacy Single PID Provider: dataverse.pid.ezid.password ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ The EZID DOI provider is likely not an option if you are `not associated with California Digital Library (CDL) or Purdue University @@ -2448,6 +2893,57 @@ This setting was added to keep S3 direct upload lightweight. When that feature i See also :ref:`s3-direct-upload-features-disabled`. +.. _dataverse.storageuse.disable-storageuse-increments: + +dataverse.storageuse.disable-storageuse-increments +++++++++++++++++++++++++++++++++++++++++++++++++++ + +This setting serves the role of an emergency "kill switch" that will disable maintaining the real time record of storage use for all the datasets and collections in the database. Because of the experimental nature of this feature (see :doc:`/admin/collectionquotas`) that hasn't been used in production setting as of this release, v6.1 this setting is provided in case these updates start causing database race conditions and conflicts on a busy server. + +dataverse.auth.oidc.* ++++++++++++++++++++++ + +Provision a single :doc:`OpenID Connect authentication provider ` using MicroProfile Config. You can find a list of +all available options at :ref:`oidc-mpconfig`. + +.. _dataverse.files.guestbook-at-request: + +dataverse.files.guestbook-at-request +++++++++++++++++++++++++++++++++++++ + +This setting enables functionality to allow guestbooks to be displayed when a user requests access to a restricted data file(s) or when a file is downloaded (the historic default). Providing a true/false value for this setting enables the functionality and provides a global default. The behavior can also be changed at the collection level via the user interface and by a superuser for a give dataset using the API. + +See also :ref:`guestbook-at-request-api` in the API Guide, and . + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FILES_GUESTBOOK_AT_REQUEST``. + +.. _dataverse.bagit.sourceorg.name: + +dataverse.bagit.sourceorg.name +++++++++++++++++++++++++++++++ + +The name for your institution that you'd like to appear in bag-info.txt. See :ref:`bag-info.txt`. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_NAME``. + +.. _dataverse.bagit.sourceorg.address: + +dataverse.bagit.sourceorg.address ++++++++++++++++++++++++++++++++++ + +The mailing address for your institution that you'd like to appear in bag-info.txt. See :ref:`bag-info.txt`. The example in https://datatracker.ietf.org/doc/html/rfc8493 uses commas as separators: ``1 Main St., Cupertino, California, 11111``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_ADDRESS``. + +.. _dataverse.bagit.sourceorg.email: + +dataverse.bagit.sourceorg.email ++++++++++++++++++++++++++++++++ + +The email for your institution that you'd like to appear in bag-info.txt. See :ref:`bag-info.txt`. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_EMAIL``. + .. _feature-flags: Feature Flags @@ -2632,8 +3128,8 @@ By default the footer says "Copyright © [YYYY]" but you can add text after the .. _:DoiProvider: -:DoiProvider -++++++++++++ +Legacy Single PID Provider: :DoiProvider +++++++++++++++++++++++++++++++++++++++++ As of this writing "DataCite" and "EZID" are the only valid options for production installations. Developers using Dataverse Software 4.10+ are welcome to use the keyword "FAKE" to configure a non-production installation with an @@ -2653,8 +3149,8 @@ JVM options: .. _:Protocol: -:Protocol -+++++++++ +Legacy Single PID Provider: :Protocol ++++++++++++++++++++++++++++++++++++++ As of this writing "doi","hdl", and "perma" are the only valid option for the protocol for a persistent ID. @@ -2662,8 +3158,8 @@ As of this writing "doi","hdl", and "perma" are the only valid option for the pr .. _:Authority: -:Authority -++++++++++ +Legacy Single PID Provider: :Authority +++++++++++++++++++++++++++++++++++++++ Use the authority assigned to you by your DoiProvider or HandleProvider, or your choice if using PermaLinks. @@ -2673,8 +3169,8 @@ Please note that a DOI or Handle authority cannot have a slash ("/") in it (slas .. _:Shoulder: -:Shoulder -+++++++++ +Legacy Single PID Provider: :Shoulder ++++++++++++++++++++++++++++++++++++++ The shoulder is used with DOIs and PermaLinks. Out of the box, the shoulder is set to "FK2/" but this is for testing only! When you apply for your DOI authority/namespace, you may have been assigned a shoulder. The following is only an example and a trailing slash is optional. @@ -2682,8 +3178,8 @@ The shoulder is used with DOIs and PermaLinks. Out of the box, the shoulder is s .. _:IdentifierGenerationStyle: -:IdentifierGenerationStyle -++++++++++++++++++++++++++ +Legacy Single PID Provider: :IdentifierGenerationStyle +++++++++++++++++++++++++++++++++++++++++++++++++++++++ By default, the Dataverse Software generates a random 6 character string, pre-pended by the Shoulder if set, to use as the identifier for a Dataset. @@ -2721,8 +3217,8 @@ more details. .. _:DataFilePIDFormat: -:DataFilePIDFormat -++++++++++++++++++ +Legacy Single PID Provider: :DataFilePIDFormat +++++++++++++++++++++++++++++++++++++++++++++++ This setting controls the way that the "identifier" component of a file's persistent identifier (PID) relates to the PID of its "parent" dataset. @@ -2810,11 +3306,10 @@ To enable setting file-level PIDs per collection:: When :AllowEnablingFilePIDsPerCollection is true, setting File PIDs to be enabled/disabled for a given collection can be done via the Native API - see :ref:`collection-attributes-api` in the Native API Guide. - .. _:IndependentHandleService: -:IndependentHandleService -+++++++++++++++++++++++++ +Legacy Single PID Provider: :IndependentHandleService ++++++++++++++++++++++++++++++++++++++++++++++++++++++ Specific for Handle PIDs. Set this setting to true if you want to use a Handle service which is setup to work 'independently' (No communication with the Global Handle Registry). By default this setting is absent and the Dataverse Software assumes it to be false. @@ -2823,8 +3318,8 @@ By default this setting is absent and the Dataverse Software assumes it to be fa .. _:HandleAuthHandle: -:HandleAuthHandle -+++++++++++++++++ +Legacy Single PID Provider: :HandleAuthHandle ++++++++++++++++++++++++++++++++++++++++++++++ Specific for Handle PIDs. Set this setting to / to be used on a global handle service when the public key is NOT stored in the default handle. By default this setting is absent and the Dataverse Software assumes it to be not set. If the public key for instance is stored in handle: 21.T12996/USER01. @@ -2921,7 +3416,7 @@ Note: by default, the URL is composed from the settings ``:GuidesBaseUrl`` and ` :GuidesBaseUrl ++++++++++++++ -Set ``:GuidesBaseUrl`` to override the default value "http://guides.dataverse.org". If you are interested in writing your own version of the guides, you may find the :doc:`/developers/documentation` section of the Developer Guide helpful. +Set ``:GuidesBaseUrl`` to override the default value "https://guides.dataverse.org". If you are interested in writing your own version of the guides, you may find the :doc:`/developers/documentation` section of the Developer Guide helpful. ``curl -X PUT -d http://dataverse.example.edu http://localhost:8080/api/admin/settings/:GuidesBaseUrl`` @@ -2942,14 +3437,14 @@ Set ``:NavbarSupportUrl`` to a fully-qualified URL which will be used for the "S Note that this will override the default behaviour for the "Support" menu option, which is to display the Dataverse collection 'feedback' dialog. -``curl -X PUT -d http://dataverse.example.edu/supportpage.html http://localhost:8080/api/admin/settings/:NavbarSupportUrl`` +``curl -X PUT -d https://dataverse.example.edu/supportpage.html http://localhost:8080/api/admin/settings/:NavbarSupportUrl`` :MetricsUrl +++++++++++ Make the metrics component on the root Dataverse collection a clickable link to a website where you present metrics on your Dataverse installation, perhaps one of the community-supported tools mentioned in the :doc:`/admin/reporting-tools-and-queries` section of the Admin Guide. -``curl -X PUT -d http://metrics.dataverse.example.edu http://localhost:8080/api/admin/settings/:MetricsUrl`` +``curl -X PUT -d https://metrics.dataverse.example.edu http://localhost:8080/api/admin/settings/:MetricsUrl`` .. _:MaxFileUploadSizeInBytes: @@ -2980,6 +3475,8 @@ This setting controls the number of files that can be uploaded through the UI at ``curl -X PUT -d 500 http://localhost:8080/api/admin/settings/:MultipleUploadFilesLimit`` +.. _:ZipDownloadLimit: + :ZipDownloadLimit +++++++++++++++++ @@ -3005,12 +3502,18 @@ You can override this global setting on a per-format basis for the following for - SAV - Rdata - CSV -- XLSX +- XLSX (in lower-case) + +For example : -For example, if you want your Dataverse installation to not attempt to ingest Rdata files larger than 1 MB, use this setting: +* if you want your Dataverse installation to not attempt to ingest Rdata files larger than 1 MB, use this setting: ``curl -X PUT -d 1000000 http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit:Rdata`` +* if you want your Dataverse installation to not attempt to ingest XLSX files at all, use this setting: + +``curl -X PUT -d 0 http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit:xlsx`` + :ZipUploadFilesLimit ++++++++++++++++++++ @@ -3043,6 +3546,21 @@ If ``:SolrFullTextIndexing`` is set to true, the content of files of any size wi ``curl -X PUT -d 314572800 http://localhost:8080/api/admin/settings/:SolrMaxFileSizeForFullTextIndexing`` + +.. _:DisableSolrFacets: + +:DisableSolrFacets +++++++++++++++++++ + +Setting this to ``true`` will make the collection ("dataverse") page start showing search results without the usual search facets on the left side of the page. A message will be shown in that column informing the users that facets are temporarily unavailable. Generating the facets is more resource-intensive for Solr than the main search results themselves, so applying this measure will significantly reduce the load on the search engine when its performance becomes an issue. + +This setting can be used in combination with the "circuit breaker" mechanism on the Solr side (see the "Installing Solr" section of the Installation Prerequisites guide). An admin can choose to enable it, or even create an automated system for enabling it in response to Solr beginning to drop incoming requests with the HTTP code 503. + +To enable the setting:: + + curl -X PUT -d true "http://localhost:8080/api/admin/settings/:DisableSolrFacets" + + .. _:SignUpUrl: :SignUpUrl @@ -3909,24 +4427,9 @@ The URL of an LDN Inbox to which the LDN Announce workflow step will send messag The list of parent dataset field names for which the LDN Announce workflow step should send messages. See :doc:`/developers/workflows` for details. -.. _:GlobusBasicToken: - -:GlobusBasicToken -+++++++++++++++++ - -GlobusBasicToken encodes credentials for Globus integration. See :ref:`globus-support` for details. - -:GlobusEndpoint -+++++++++++++++ - -GlobusEndpoint is Globus endpoint id used with Globus integration. See :ref:`globus-support` for details. - -:GlobusStores -+++++++++++++ - -A comma-separated list of the S3 stores that are configured to support Globus integration. See :ref:`globus-support` for details. +.. _:GlobusSettings: -:GlobusAppURL +:GlobusAppUrl +++++++++++++ The URL where the `dataverse-globus `_ "transfer" app has been deployed to support Globus integration. See :ref:`globus-support` for details. @@ -3971,3 +4474,25 @@ A true/false (default) option determining whether the dataset datafile table dis .. _supported MicroProfile Config API source: https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html + +.. _:UseStorageQuotas: + +:UseStorageQuotas ++++++++++++++++++ + +Enables storage use quotas in collections. See the :doc:`/api/native-api` for details. + + +.. _:StoreIngestedTabularFilesWithVarHeaders: + +:StoreIngestedTabularFilesWithVarHeaders +++++++++++++++++++++++++++++++++++++++++ + +With this setting enabled, tabular files produced during Ingest will +be stored with the list of variable names added as the first +tab-delimited line. As the most significant effect of this feature, +Access API will be able to take advantage of Direct Download for +tab. files saved with these headers on S3 - since they no longer have +to be generated and added to the streamed file on the fly. + +The setting is ``false`` by default, preserving the legacy behavior. diff --git a/doc/sphinx-guides/source/installation/installation-main.rst b/doc/sphinx-guides/source/installation/installation-main.rst index 021a97415e3..bc51a8e19f5 100755 --- a/doc/sphinx-guides/source/installation/installation-main.rst +++ b/doc/sphinx-guides/source/installation/installation-main.rst @@ -68,7 +68,7 @@ The script will prompt you for some configuration values. If this is a test/eval If desired, these default values can be configured by creating a ``default.config`` (example :download:`here <../../../../scripts/installer/default.config>`) file in the installer's working directory with new values (if this file isn't present, the above defaults will be used). -This allows the installer to be run in non-interactive mode (with ``./install -y -f > install.out 2> install.err``), which can allow for easier interaction with automated provisioning tools. +This allows the installer to be run in non-interactive mode (with ``./install.py -y -f > install.out 2> install.err``), which can allow for easier interaction with automated provisioning tools. All the Payara configuration tasks performed by the installer are isolated in the shell script ``dvinstall/as-setup.sh`` (as ``asadmin`` commands). @@ -100,7 +100,7 @@ The supplied site URL will be saved under the JVM option :ref:`dataverse.siteUrl The Dataverse Software uses JHOVE_ to help identify the file format (CSV, PNG, etc.) for files that users have uploaded. The installer places files called ``jhove.conf`` and ``jhoveConfig.xsd`` into the directory ``/usr/local/payara6/glassfish/domains/domain1/config`` by default and makes adjustments to the jhove.conf file based on the directory into which you chose to install Payara. -.. _JHOVE: http://jhove.openpreservation.org +.. _JHOVE: https://jhove.openpreservation.org Logging In ---------- @@ -120,7 +120,7 @@ Use the following credentials to log in: - username: dataverseAdmin - password: admin -Congratulations! You have a working Dataverse installation. Soon you'll be tweeting at `@dataverseorg `_ asking to be added to the map at http://dataverse.org :) +Congratulations! You have a working Dataverse installation. Soon you'll be tweeting at `@dataverseorg `_ asking to be added to the map at https://dataverse.org :) Trouble? See if you find an answer in the troubleshooting section below. @@ -204,7 +204,7 @@ Be sure you save the changes made here and then restart your Payara server to te UnknownHostException While Deploying ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -If you are seeing "Caused by: java.net.UnknownHostException: myhost: Name or service not known" in server.log and your hostname is "myhost" the problem is likely that "myhost" doesn't appear in ``/etc/hosts``. See also http://stackoverflow.com/questions/21817809/glassfish-exception-during-deployment-project-with-stateful-ejb/21850873#21850873 +If you are seeing "Caused by: java.net.UnknownHostException: myhost: Name or service not known" in server.log and your hostname is "myhost" the problem is likely that "myhost" doesn't appear in ``/etc/hosts``. See also https://stackoverflow.com/questions/21817809/glassfish-exception-during-deployment-project-with-stateful-ejb/21850873#21850873 .. _fresh-reinstall: diff --git a/doc/sphinx-guides/source/installation/intro.rst b/doc/sphinx-guides/source/installation/intro.rst index 67fc774bdbd..6d77a1209b2 100644 --- a/doc/sphinx-guides/source/installation/intro.rst +++ b/doc/sphinx-guides/source/installation/intro.rst @@ -2,7 +2,7 @@ Introduction ============ -Welcome! Thanks for installing `The Dataverse Project `_! +Welcome! Thanks for installing `The Dataverse Project `_! .. contents:: |toctitle| :local: @@ -36,7 +36,7 @@ Getting Help To get help installing or configuring a Dataverse installation, please try one or more of: - posting to the `dataverse-community `_ Google Group. -- asking at http://chat.dataverse.org +- asking at https://chat.dataverse.org - emailing support@dataverse.org to open a private ticket at https://help.hmdc.harvard.edu Information to Send to Support When Installation Fails diff --git a/doc/sphinx-guides/source/installation/oauth2.rst b/doc/sphinx-guides/source/installation/oauth2.rst index 8dffde87cc2..7a0e938b572 100644 --- a/doc/sphinx-guides/source/installation/oauth2.rst +++ b/doc/sphinx-guides/source/installation/oauth2.rst @@ -11,7 +11,7 @@ As explained under "Auth Modes" in the :doc:`config` section, OAuth2 is one of t `OAuth2 `_ is an authentication protocol that allows systems to share user data, while letting the users control what data is being shared. When you see buttons stating "login with Google" or "login through Facebook", OAuth2 is probably involved. For the purposes of this section, we will shorten "OAuth2" to just "OAuth." OAuth can be compared and contrasted with :doc:`shibboleth`. -The Dataverse Software supports four OAuth providers: `ORCID `_, `Microsoft Azure Active Directory (AD) `_, `GitHub `_, and `Google `_. +The Dataverse Software supports four OAuth providers: `ORCID `_, `Microsoft Azure Active Directory (AD) `_, `GitHub `_, and `Google `_. In addition :doc:`oidc` are supported, using a standard based on OAuth2. diff --git a/doc/sphinx-guides/source/installation/oidc.rst b/doc/sphinx-guides/source/installation/oidc.rst index 1fdfcce63b5..d132fd2953d 100644 --- a/doc/sphinx-guides/source/installation/oidc.rst +++ b/doc/sphinx-guides/source/installation/oidc.rst @@ -16,7 +16,7 @@ Being a standard, you can easily enable the use of any OpenID connect compliant Some prominent provider examples: - `Google `_ -- `Microsoft Azure AD `_ +- `Microsoft Azure AD `_ - `Yahoo `_ - ORCID `announced support `_ @@ -26,7 +26,7 @@ You can also either host an OpenID Connect identity management on your own or us - `Keycloak `_ is an open source solution for an IDM/IAM - `Unity IDM `_ is another open source IDM/IAM solution -Other use cases and combinations +Other Use Cases and Combinations -------------------------------- - Using your custom identity management solution might be a workaround when you seek for LDAP support, but @@ -41,7 +41,7 @@ Other use cases and combinations - In the future, OpenID Connect might become a successor to the large scale R&E SAML federations we have nowadays. See also `OpenID Connect Federation Standard `_ (in development) -How to use +How to Use ---------- Just like with :doc:`oauth2` you need to obtain a *Client ID* and a *Client Secret* from your provider(s). @@ -51,7 +51,7 @@ Just like with :doc:`oauth2` you need to obtain a *Client ID* and a *Client Secr You need to apply for credentials out-of-band. The Dataverse installation will discover all necessary metadata for a given provider on its own (this is `part of the standard -`_). +`_). To enable this, you need to specify an *Issuer URL* when creating the configuration for your provider (see below). @@ -59,18 +59,61 @@ Finding the issuer URL is best done by searching for terms like "discovery" in t The discovery document is always located at ``/.well-known/openid-configuration`` (standardized). To be sure, you can always lookup the ``issuer`` value inside the live JSON-based discovery document. -Please create a my-oidc-provider.json file like this, replacing every ``<...>`` with your values: +Note if you work with Keycloak, make sure the base URL is in the following format: ``https://host:port/realms/{realm}`` +where ``{realm}`` has to be replaced by the name of the Keycloak realm. -.. code-block:: json +After adding a provider, the Log In page will by default show the "builtin" provider, but you can adjust this via the +``:DefaultAuthProvider`` configuration option. For details, see :doc:`config`. - { - "id":"", - "factoryAlias":"oidc", - "title":"", - "subtitle":"", - "factoryData":"type: oidc | issuer: | clientId: | clientSecret: ", - "enabled":true - } +.. hint:: + In contrast to our :doc:`oauth2`, you can use multiple providers by creating distinct configurations enabled by + the same technology and without modifying the Dataverse Software code base (standards for the win!). + + +.. _oidc-pkce: + +Enabling PKCE Security +^^^^^^^^^^^^^^^^^^^^^^ + +Many providers these days support or even require the usage of `PKCE `_ to safeguard against +some attacks and enable public clients that cannot have a secure secret to still use OpenID Connect (or OAuth2). + +The Dataverse-built OIDC client can be configured to use PKCE and the method to use when creating the code challenge can be specified. +See also `this explanation of the flow `_ +for details on how this works. + +As we are using the `Nimbus SDK `_ as our client +library, we support the standard ``PLAIN`` and ``S256`` (SHA-256) code challenge methods. "SHA-256 method" is the default +as recommend in `RFC7636 `_. If your provider needs some +other method, please open an issue. + +The provisioning sections below contain in the example the parameters you may use to configure PKCE. + +Provision a Provider +-------------------- + +Depending on your use case, you can choose different ways to setup one or multiple OIDC identity providers. + +Using :ref:`jvm-options` has the advantage of being consistent and does not require additional calls to the API. +It can only configure one provider though, yet you can mix with other provider definitions via API. + +Using the REST API has the advantage of provisioning multiple, different OIDC providers. +Depending on your use case, it has the drawback of needing additional API calls. + +If you only need one single provider in your installation and it is using OIDC, use the JVM options, as it +requires fewer extra steps and allows you to keep more configuration in a single source. + +Provision via REST API +^^^^^^^^^^^^^^^^^^^^^^ + +Note: you may omit the PKCE related settings from ``factoryData`` below if you don't plan on using PKCE - default is +disabled. + +Please create a :download:`my-oidc-provider.json <../_static/installation/files/root/auth-providers/oidc.json>` file, replacing every ``<...>`` with your values: + +.. literalinclude:: /_static/installation/files/root/auth-providers/oidc.json + :name: oidc-provider-example + :language: json Now load the configuration into your Dataverse installation using the same API as with :doc:`oauth2`: @@ -80,9 +123,68 @@ The Dataverse installation will automatically try to load the provider and retri You should see the new provider under "Other options" on the Log In page, as described in the :doc:`/user/account` section of the User Guide. -By default, the Log In page will show the "builtin" provider, but you can adjust this via the ``:DefaultAuthProvider`` -configuration option. For details, see :doc:`config`. - -.. hint:: - In contrast to our :doc:`oauth2`, you can use multiple providers by creating distinct configurations enabled by - the same technology and without modifying the Dataverse Software code base (standards for the win!). +.. _oidc-mpconfig: + +Provision via JVM Options +^^^^^^^^^^^^^^^^^^^^^^^^^ + +A single provider may be provisioned using :ref:`jvm-options`. +It may be accompanied by more providers configured via REST API. +Note that this provider will only be deployed at startup time and (currently) cannot be reconfigured without a restart. + +All options below may be set via *MicroProfile Config API* sources. Examples: use environment variable +``DATAVERSE_AUTH_OIDC_ENABLED`` for the ``dataverse.auth.oidc.enabled`` option or ``DATAVERSE_AUTH_OIDC_CLIENT_ID`` +for the ``dataverse.auth.oidc.client-id`` option. + +The following options are available: + +.. list-table:: + :widths: 25 55 10 10 + :header-rows: 1 + :align: left + + * - Option + - Description + - Mandatory + - Default + * - ``dataverse.auth.oidc.enabled`` + - Enable or disable provisioning the provider via MicroProfile. + - N + - ``false`` + * - ``dataverse.auth.oidc.client-id`` + - The client-id of the application to identify it at your provider. + - Y + - \- + * - ``dataverse.auth.oidc.client-secret`` + - A confidential secret to authorize application requests to the provider as legit. + - N + - \- + * - ``dataverse.auth.oidc.auth-server-url`` + - The base URL of the OpenID Connect (OIDC) server as explained above. + - Y + - \- + * - ``dataverse.auth.oidc.pkce.enabled`` + - Set to ``true`` to enable :ref:`PKCE ` in auth flow. + - N + - ``false`` + * - ``dataverse.auth.oidc.pkce.method`` + - Set code challenge method. The default value is the current best practice in the literature. + - N + - ``S256`` + * - ``dataverse.auth.oidc.title`` + - The UI visible name for this provider in login options. + - N + - ``OpenID Connect`` + * - ``dataverse.auth.oidc.subtitle`` + - A subtitle, currently not displayed by the UI. + - N + - ``OpenID Connect`` + * - ``dataverse.auth.oidc.pkce.max-cache-size`` + - Tune the maximum size of all OIDC providers' verifier cache (the number of outstanding PKCE-enabled auth responses). + - N + - 10000 + * - ``dataverse.auth.oidc.pkce.max-cache-age`` + - Tune the maximum age, in seconds, of all OIDC providers' verifier cache entries. Default is 5 minutes, equivalent to lifetime + of many OIDC access tokens. + - N + - 300 \ No newline at end of file diff --git a/doc/sphinx-guides/source/installation/prerequisites.rst b/doc/sphinx-guides/source/installation/prerequisites.rst index 1847f1b8f63..a56f4811ace 100644 --- a/doc/sphinx-guides/source/installation/prerequisites.rst +++ b/doc/sphinx-guides/source/installation/prerequisites.rst @@ -26,7 +26,7 @@ Installing Java The Dataverse Software should run fine with only the Java Runtime Environment (JRE) installed, but installing the Java Development Kit (JDK) is recommended so that useful tools for troubleshooting production environments are available. We recommend using Oracle JDK or OpenJDK. -The Oracle JDK can be downloaded from http://www.oracle.com/technetwork/java/javase/downloads/index.html +The Oracle JDK can be downloaded from https://www.oracle.com/technetwork/java/javase/downloads/index.html On a RHEL/derivative, install OpenJDK (devel version) using yum:: @@ -211,6 +211,25 @@ Finally, you need to tell Solr to create the core "collection1" on startup:: echo "name=collection1" > /usr/local/solr/solr-9.3.0/server/solr/collection1/core.properties +Dataverse collection ("dataverse") page uses Solr very heavily. On a busy instance this may cause the search engine to become the performance bottleneck, making these pages take increasingly longer to load, potentially affecting the overall performance of the application and/or causing Solr itself to crash. If this is observed on your instance, we recommend uncommenting the following lines in the ```` section of the ``solrconfig.xml`` file:: + + true + 75 + +and:: + + true + 75 + +This will activate Solr "circuit breaker" mechanisms that make it start dropping incoming requests with the HTTP code 503 when it starts experiencing load issues. As of Dataverse 6.1, the collection page will recognize this condition and display a customizeable message to the users informing them that the search engine is unavailable because of heavy load, with the assumption that the condition is transitive and suggesting that they try again later. This is still an inconvenience to the users, but still a more graceful handling of the problem, rather than letting the pages time out or causing crashes. You may need to experiment and adjust the threshold values defined in the lines above. + +If this becomes a common issue, another temporary workaround an admin may choose to use is to enable the following setting:: + + curl -X PUT -d true "http://localhost:8080/api/admin/settings/:DisableSolrFacets" + +This will make the collection page show the search results without the usual search facets on the left side of the page. Another customizeable message will be shown in that column informing the users that facets are temporarily unavailable. Generating these facets is more resource-intensive for Solr than the main search results themselves, so applying this measure will significantly reduce the load on the search engine. + + Solr Init Script ================ @@ -233,11 +252,9 @@ For systems using init.d (like CentOS 6), download this :download:`Solr init scr Securing Solr ============= -Our sample init script and systemd service file linked above tell Solr to only listen on localhost (127.0.0.1). We strongly recommend that you also use a firewall to block access to the Solr port (8983) from outside networks, for added redundancy. - -It is **very important** not to allow direct access to the Solr API from outside networks! Otherwise, any host that can reach the Solr port (8983 by default) can add or delete data, search unpublished data, and even reconfigure Solr. For more information, please see https://lucene.apache.org/solr/guide/7_3/securing-solr.html. A particularly serious security issue that has been identified recently allows a potential intruder to remotely execute arbitrary code on the system. See `RCE in Solr via Velocity Template `_ for more information. +As of version 9.3.0, Solr listens solely on localhost for security reasons. If your installation will run Solr on its own host, you will need to edit ``bin/solr.in.sh``, setting ``JETTY_HOST`` to the external IP address of your Solr server to tell Solr to accept external connections. -If you're running your Dataverse installation across multiple service hosts you'll want to remove the jetty.host argument (``-j jetty.host=127.0.0.1``) from the startup command line, but make sure Solr is behind a firewall and only accessible by the Dataverse installation host(s), by specific ip address(es). +We strongly recommend that you also use a firewall to block access to the Solr port (8983) from outside networks. It is **very important** not to allow direct access to the Solr API from outside networks! Otherwise, any host that can reach Solr can add or delete data, search unpublished data, and even reconfigure Solr. For more information, please see https://solr.apache.org/guide/solr/latest/deployment-guide/securing-solr.html We additionally recommend that the Solr service account's shell be disabled, as it isn't necessary for daily operation:: @@ -267,7 +284,7 @@ Installing jq or you may install it manually:: # cd /usr/bin - # wget http://stedolan.github.io/jq/download/linux64/jq + # wget https://stedolan.github.io/jq/download/linux64/jq # chmod +x jq # jq --version diff --git a/doc/sphinx-guides/source/installation/shibboleth.rst b/doc/sphinx-guides/source/installation/shibboleth.rst index cd0fbda77a6..9f7c04c1534 100644 --- a/doc/sphinx-guides/source/installation/shibboleth.rst +++ b/doc/sphinx-guides/source/installation/shibboleth.rst @@ -76,7 +76,7 @@ A ``jk-connector`` network listener should have already been set up when you ran You can verify this with ``./asadmin list-network-listeners``. -This enables the `AJP protocol `_ used in Apache configuration files below. +This enables the `AJP protocol `_ used in Apache configuration files below. SSLEngine Warning Workaround ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -93,7 +93,7 @@ Configure Apache Enforce HTTPS ~~~~~~~~~~~~~ -To prevent attacks such as `FireSheep `_, HTTPS should be enforced. https://wiki.apache.org/httpd/RewriteHTTPToHTTPS provides a good method. You **could** copy and paste that those "rewrite rule" lines into Apache's main config file at ``/etc/httpd/conf/httpd.conf`` but using Apache's "virtual hosts" feature is recommended so that you can leave the main configuration file alone and drop a host-specific file into place. +To prevent attacks such as `FireSheep `_, HTTPS should be enforced. https://wiki.apache.org/httpd/RewriteHTTPToHTTPS provides a good method. You **could** copy and paste that those "rewrite rule" lines into Apache's main config file at ``/etc/httpd/conf/httpd.conf`` but using Apache's "virtual hosts" feature is recommended so that you can leave the main configuration file alone and drop a host-specific file into place. Below is an example of how "rewrite rule" lines look within a ``VirtualHost`` block. Download a :download:`sample file <../_static/installation/files/etc/httpd/conf.d/dataverse.example.edu.conf>` , edit it to substitute your own hostname under ``ServerName``, and place it at ``/etc/httpd/conf.d/dataverse.example.edu.conf`` or a filename that matches your hostname. The file must be in ``/etc/httpd/conf.d`` and must end in ".conf" to be included in Apache's configuration. @@ -235,7 +235,7 @@ Run semodule Silent is golden. No output is expected. This will place a file in ``/etc/selinux/targeted/modules/active/modules/shibboleth.pp`` and include "shibboleth" in the output of ``semodule -l``. See the ``semodule`` man page if you ever want to remove or disable the module you just added. -Congrats! You've made the creator of http://stopdisablingselinux.com proud. :) +Congrats! You've made the creator of https://stopdisablingselinux.com proud. :) Restart Apache and Shibboleth ----------------------------- @@ -408,6 +408,8 @@ Rather than looking up the user's id in the ``authenticateduser`` database table Per above, you now need to tell the user to use the password reset feature to set a password for their local account. +.. _shib-groups: + Institution-Wide Shibboleth Groups ---------------------------------- diff --git a/doc/sphinx-guides/source/qa/index.md b/doc/sphinx-guides/source/qa/index.md new file mode 100644 index 00000000000..f16cd1d38fc --- /dev/null +++ b/doc/sphinx-guides/source/qa/index.md @@ -0,0 +1,10 @@ +# QA Guide + +```{toctree} +overview.md +testing-approach.md +testing-infrastructure.md +qa-workflow.md +test-automation.md +performance-tests.md +``` diff --git a/doc/sphinx-guides/source/qa/overview.md b/doc/sphinx-guides/source/qa/overview.md new file mode 100644 index 00000000000..60e6a28ee9a --- /dev/null +++ b/doc/sphinx-guides/source/qa/overview.md @@ -0,0 +1,63 @@ +# Overview + +```{contents} Contents: +:local: +:depth: 3 +``` + +## Introduction + +This guide describes the testing process used by QA at IQSS and provides a reference for others filling in for that role. Please note that many variations are possible, and the main thing is to catch bugs and provide a good quality product to the user community. + +## Workflow + +Here is a brief description of our workflow: + +### Issue Submission and Prioritization: +- Members of the community or the development team submit bugs or request features through GitHub as [Issues](https://github.com/IQSS/dataverse/issues). +- These Issues are prioritized and added to a two-week-long sprint that can be tracked on the {ref}`kanban-board`. + +### Development Process: +- Developers will work on a solution on a separate branch +- Once a developer completes their work, they submit a [Pull Request](https://github.com/IQSS/dataverse/pulls) (PR). +- The PR is reviewed by a developer from the team. +- During the review, the reviewer may suggest coding or documentation changes to the original developer. + +### Quality Assurance (QA) Testing: +- The QA tester performs a smoke test of core functionality and regression testing. +- Documentation is used to understand the feature and validate any assertions made. +- If no documentation is provided in the PR, the tester may refer to the original bug report to determine the desired outcome of the changes. +- Once the branch is assumed to be safe, it is merged into the develop branch. + +### Final Steps: +- The PR and the Issue are closed and assigned the “merged†status. +- It is good practice to delete the branch if it is local. +- The content from the PR becomes part of the codebase for {doc}`future releases `. + +The complete suggested workflow can be found at {doc}`qa-workflow`. + +## Tips and Tricks + +- Start testing simply, with the most obvious test. You don’t need to know all your tests upfront. As you gain comfort and understanding of how it works, try more tests until you are done. If it is a complex feature, jot down your tests in an outline format, some beforehand as a guide, and some after as things occur to you. Save the doc in a testing folder (on Google Drive). This potentially will help with future testing. +- When in doubt, ask someone. If you are confused about how something is working, it may be something you have missed, or it could be a documentation issue, or it could be a bug! Talk to the code reviewer and the contributor/developer for their opinion and advice. +- Always tail the server.log file while testing. Open a terminal window to the test instance and `tail -F server.log`. This helps you get a real-time sense of what the server is doing when you interact with the application and makes it easier to identify any stack trace on failure. +- When overloaded, QA the simple pull requests first to reduce the queue. It gives you a mental boost to complete something and reduces the perception of the amount of work still to be done. +- When testing a bug fix, try reproducing the bug on the demo server before testing the fix. That way you know you are taking the correct steps to verify that the fix worked. +- When testing an optional feature that requires configuration, do a smoke test without the feature configured and then with it configured. That way you know that folks using the standard config are unaffected by the option if they choose not to configure it. +- Back up your DB before applying an irreversible DB update when you are using a persistent/reusable platform. Just in case it fails, and you need to carry on testing something else you can use the backup. + +## Release Cadence and Sprints + +A release likely spans multiple two-week sprints. Each sprint represents the priorities for that time and is sized so that the team can reasonably complete most of the work on time. This is a goal to help with planning, it is not a strict requirement. Some issues from the previous sprint may remain and likely be included in the next sprint but occasionally may be deprioritized and deferred to another time. + +The decision to make a release can be based on the time since the last release, some important feature needed by the community or contractual deadline, or some other logical reason to package the work completed into a named release and posted to the releases section on GitHub. + +## Test API + +The API test suite is added to and maintained by development. (See {doc}`/developers/testing` in the Developer Guide.) It is generally advisable for code contributors to add API tests when adding new functionality. The approach here is one of code coverage: exercise as much of the code base's code paths as possible, every time to catch bugs. + +This type of approach is often used to give contributing developers confidence that their code didn’t introduce any obvious, major issues and is run on each commit. Since it is a broad set of tests, it is not clear whether any specific, conceivable test is run but it does add a lot of confidence that the code base is functioning due to its reach and consistency. (See {doc}`/qa/test-automation` in the Developer Guide.) + +## Making a Release + +See {doc}`/developers/making-releases` in the Developer Guide. diff --git a/doc/sphinx-guides/source/qa/performance-tests.md b/doc/sphinx-guides/source/qa/performance-tests.md new file mode 100644 index 00000000000..404188735a2 --- /dev/null +++ b/doc/sphinx-guides/source/qa/performance-tests.md @@ -0,0 +1,31 @@ +# Performance Testing + +```{contents} Contents: +:local: +:depth: 3 +``` + +## Introduction + +The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time-consuming, it is done once near the end. Using a load-generating tool named {ref}`Locust `, our scripts load the statistically most-loaded pages (according to Google Analytics): 50% homepage and 50% some type of dataset page. + +Since dataset page weight also varies by the number of files, a selection of about 10 datasets with varying file counts is used. The pages are called randomly as a guest user with increasing levels of user load, from 1 user to 250 users. Typical daily loads in production are around the 50-user level. Though the simulated user level does have a modest amount of random think time before repeated calls, from 5-20 seconds, it is not a real-world load so direct comparisons to production are not reliable. Instead, we compare performance to prior versions of the product, and based on how that performed in production we have some idea whether this might be similar in performance or whether there is some undetected issue that appears under load, such as inefficient or too many DB queries per page. + +## Testing Environment + +To run performance tests, we have a performance test cluster on AWS that employs web, database, and Solr. The database contains a copy of production that is updated weekly on Sundays. To ensure the homepage content is consistent between test runs across releases, two scripts set the datasets that will appear on the homepage. There is a script on the web server in the default CentOS user dir and one on the database server in the default CentOS user dir. Run these scripts before conducting the tests. + +Once the performance has been tested and recorded in a [Google spreadsheet](https://docs.google.com/spreadsheets/d/1lwPlifvgu3-X_6xLwq6Zr6sCOervr1mV_InHIWjh5KA/edit?usp=sharing) for this proposed version, the release will be prepared and posted. + +## Access + +Access to performance cluster instances requires ssh keys. The cluster itself is normally not running to reduce costs. To turn on the cluster, log on to the demo server and run the perfenv scripts from the centos default user dir. + +## Special Notes âš ï¸ + +Please note the performance database is also used occasionally by members of the Curation team to generate prod reports so a courtesy check with them would be good before taking over the env. + + +Executing the Performance Script +-------------------------------- +To execute the performance test script, you need to install a local copy of the database-helper-scripts project at . We have since produced a stripped-down script that calls just the collection and dataset pages and works with Python 3. diff --git a/doc/sphinx-guides/source/qa/qa-workflow.md b/doc/sphinx-guides/source/qa/qa-workflow.md new file mode 100644 index 00000000000..9915fe97d98 --- /dev/null +++ b/doc/sphinx-guides/source/qa/qa-workflow.md @@ -0,0 +1,100 @@ +# QA Workflow for Pull Requests + +```{contents} Contents: +:local: +:depth: 3 +``` +## Checklist + +1. Assign the PR you are working on to yourself. + +1. What does it do? + + Read the description at the top of the PR, any release notes, documentation, and the original issue. + +1. Does it address the issue it closes? + + The PR should address the issue entirely unless otherwise noted. + +1. How do you test it? + + Look at the “how to test" section at the top of the pull request. Does it make sense? This likely won’t be the only testing you perform. You can develop further tests from the original issue or problem description, from the description of functionality, the documentation, configuration, and release notes. Also consider trying to reveal bugs by trying to break it: try bad or missing data, very large values or volume of data, exceed any place that may have a limit or boundary. + +1. Does it have or need documentation? + + Small changes or fixes usually don’t have docs but new features or extensions of a feature or new configuration options should have documentation. + +1. Does it have or need a release note snippet? + + Same as for doc, just a heads up to an admin for something of note or especially upgrade instructions as needed. See also {ref}`writing-release-note-snippets` for what to expect in a release note snippet. + +1. Does it include a database migration script (Flyway)? + + First, check the numbering in the filename of the script. It must be in line with the rules defined at {ref}`create-sql-script`. If the number is out of date (very common for older pull requests), do not merge and ask the developer to rename the script. Otherwise, deployment will fail. + + Once you're sure the numbering is ok (the next available number, basically), back up your database and proceeed with testing. + +1. Validate the documentation. + + Build the doc using Jenkins or read the automated Read the Docs preview. Does it build without errors? + Read it through for sense. + Use it for test cases and to understand the feature. + +1. Build and deploy the pull request. + + Normally this is done using Jenkins and automatically deployed to the QA test machine. See {ref}`deploy-to-internal`. + +1. Configure if required + + If needed to operate and everyone installing or upgrading will use this, configure now as all testing will use it. + +1. Smoke test the branch. + + Standard, minimal test of core functionality. + +1. Regression test-related or potentially affected features + + If config is optional and testing without config turned on, do some spot checks/ regression tests of related or potentially affected areas. + +1. Configure if optional + + What is the default, enabled or disabled? Is that clearly indicated? Test both. + By config here we mean enabling the functionality versus choosing a particular config option. Some complex features have config options in addition to enabling. Those will also need to be tested. + +1. Test all the new or changed functionality. + + The heart of the PR, what is this PR adding or fixing? Is it all there and working? + +1. Regression test related or potentially affected features. + + Sometimes new stuff modifies and extends other functionality or functionality that is shared with other aspects of the system, e.g. export, import. Check the underlying functionality that was also modified but in a spot check or briefer manner. + +1. Report any issues found within the PR + + It can be easy to lose track of what you’ve found, steps to reproduce, and any errors or stack traces from the server log. Add these in a numbered list to a comment in the pr. Easier to check off when fixed and to work on. Add large amounts of text as in the server log as attached, meaningfully named files. + +1. Retest all fixes, spot check feature functionality, smoke test + + Similar to your initial testing, it is only narrower. + +1. Test upgrade instructions, if required + + Some features build upon the existing architecture but require modifications, such as adding a new column to the DB or changing or adding data. It is crucial that this works properly for our 100+ installations. This testing should be performed at the least on the prior version with basic data objects (collection, dataset, files) and any other data that will be updated by this feature. Using the sample data from the prior version would be good or deploying to dataverse-internal and upgrading there would be a good test. Remember to back up your DB before doing a transformative upgrade so that you can repeat it later if you find a bug. + +1. Make sure the API tests in the PR have been completed and passed. + + They are run with each commit to the PR and take approximately 42 minutes to run. + +1. Merge PR + + Click the "Merge pull request" button and be sure to use the "Create a merge commit" option to include this PR into the common develop branch. + + Some of the reasons why we encourage using this option over Rebase or Squash are: + + - Preservation of commit history + - Clearer context and treaceability + - Easier collaboration, bug tracking and reverting + +1. Delete merged branch + + Just a housekeeping move if the PR is from IQSS. Click the delete branch button where the merge button had been. There is no deletion for outside contributions. diff --git a/doc/sphinx-guides/source/qa/test-automation.md b/doc/sphinx-guides/source/qa/test-automation.md new file mode 100644 index 00000000000..708d0f88e23 --- /dev/null +++ b/doc/sphinx-guides/source/qa/test-automation.md @@ -0,0 +1,62 @@ +# Test Automation +```{contents} Contents: +:local: +:depth: 3 +``` + +## Jenkins + +Jenkins is our primary tool for knowing if our API tests are passing. (Unit tests are executed locally by developers.) + +You can find our Jenkins installation at . + +Please note that while it has been open to the public in the past, it is currently firewalled off. We can poke a hole in the firewall for your IP address if necessary. Please get in touch. (You might also be interested in which is about restoring the ability of contributors to see if their pull requests are passing API tests or not.) + +### Jenkins Jobs + +Jenkins is organized into jobs. We'll highlight a few. + +#### IQSS-dataverse-develop + +, which we will refer to as the "develop" job, runs after pull requests are merged. It is crucial that this job stays green (passing) because we always want to stay in a "release ready" state. If you notice that this job is failing, make noise about it! + +You can access this job from the README at . + +#### IQSS-Dataverse-Develop-PR + + can be thought of as "PR jobs". It's a collection of jobs run on pull requests. Typically, you will navigate directly into the job (and it's particular build number) from a pull request. For example, from , look for a check called "continuous-integration/jenkins/pr-merge". Clicking it will bring you to a particular build like (build #10). + +#### guides.dataverse.org + + is what we use to build guides. See {ref}`build-guides` in the Developer Guide for how this job is used at release time. + +### Checking if API Tests are Passing on Jenkins + +If API tests are failing, you should not merge the pull request. + +How can you know if API tests are passing? Here are the steps, by way of example. + +- From the pull request, navigate to the build. For example from , look for a check called "continuous-integration/jenkins/pr-merge". Clicking it will bring you to a particular build like (build #10). +- You are now on the new "blue" interface for Jenkins. Click the button with an arrow on the right side of the header called "go to classic" which should take you to (for example) . +- Click "Test Result". +- Under "All Tests", look at the duration for "edu.harvard.iq.dataverse.api". It should be ten minutes or higher. If it was only a few seconds, tests did not run. +- Assuming tests ran, if there were failures, they should appear at the top under "All Failed Tests". Inform the author of the pull request about the error. + +### Diagnosing Failures on Jenkins + +API test failures can have multiple causes. As described above, from the "Test Result" page, you might see the failure under "All Failed Tests". However, the test could have failed because of some underlying system issue. + +If you have determined that the API tests have not run at all, your next step should be to click on "Console Output". For example, . Click "Full log" to see the full log in the browser or navigate to (for example) to get a plain text version. + +Go to the end of the log and then scroll up, looking for the failure. A failed Ansible task can look like this: + +``` +TASK [dataverse : download payara zip] ***************************************** +fatal: [localhost]: FAILED! => {"changed": false, "dest": "/tmp/payara.zip", "elapsed": 10, "msg": "Request failed: ", "url": "https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2023.8/payara-6.2023.8.zip"} +``` + +In the example above, if Payara can't be downloaded, we're obviously going to have problems deploying Dataverse to it! + +## GitHub Actions + +We also use GitHub Actions. See for a list of actions. diff --git a/doc/sphinx-guides/source/qa/testing-approach.md b/doc/sphinx-guides/source/qa/testing-approach.md new file mode 100644 index 00000000000..817161d02a0 --- /dev/null +++ b/doc/sphinx-guides/source/qa/testing-approach.md @@ -0,0 +1,50 @@ +# Testing Approach + +```{contents} Contents: +:local: +:depth: 3 +``` +## Introduction + +We use a risk-based, manual testing approach to achieve the most benefit with limited resources. This means we want to catch bugs where they are likely to exist, ensure core functions work, and failures do not have catastrophic results. In practice this means we do a brief positive check of core functions on each build called a smoke test, we test the most likely place for new bugs to exist, the area where things have changed, and attempt to prevent catastrophic failure by asking about the scope and reach of the code and how failures may occur. + +If it seems possible through user error or some other occurrence that such a serious failure will occur, we try to make it happen in the test environment. If the code has a UI component, we also do a limited amount of browser compatibility testing using Chrome, Firefox, and Safari browsers. We do not currently do UX or accessibility testing on a regular basis, though both have been done product-wide by a Design group (in the past) and by the community. + +## Examining a Pull Request for Test Cases + +### What Problem Does It Solve? + +Read the top part of the pull request for a description, notes for reviewers, and usually a "how to test" section. Does it make sense? If not, read the underlying issue it closes and any release notes or documentation. Knowing in general what it does helps you to think about how to approach it. + +### How is It Configured? + +Most pull requests do not have any special configuration and are enabled on deployment, but some do. Configuration is part of testing. A sysadmin or superuser will need to follow these instructions so make sure they are in the release note snippet and try them out. Plus, that is the only way you will get it working to test it! + +Identify test cases by examining the problem report or feature description and any documentation of functionality. Look for statements or assertions about functions, what it does, as well as conditions or conditional behavior. These become your test cases. Think about how someone might make a mistake using it and try it. Does it fail gracefully or in a confusing, or worse, damaging manner? Also, consider whether this pull request may interact with other functionality and try some spot checks there. For instance, if new metadata fields have been added, try the export feature. Of course, try the suggestions under "how to test." Those may be sufficient, but you should always think about the pull request based on what it does. + +Try adding, modifying, and deleting any objects involved. This is probably covered by using the feature, but this is a good basic approach to keep in mind. + +Make sure any server logging is appropriate. You should tail the server log while running your tests. Watch for unreported errors or stack traces especially chatty logging. If you do find a bug you will need to report the stack trace from the server.log. Err on the side of providing the developer too much of server.log rather than too little. + +Exercise the UI if there is one. We tend to use Chrome for most of our basic testing as it's used twice as much as the next most commonly-used browser, according to our site's Google Analytics. First go through all the options in the UI. Then, if all works, spot-check using Firefox and Safari. + +Check permissions. Is this feature limited to a specific set of users? Can it be accessed by a guest or by a non-privileged user? How about pasting a privileged page URL into a non-privileged user’s browser? + +Think about risk. Is the feature or function part of a critical area such as permissions? Does the functionality modify data? You may do more testing when the risk is higher. + +## Smoke Test + +1. Go to the homepage on . Scroll to the bottom to ensure the build number is the one you intend to test from Jenkins. +1. Create a new user: It's fine to use a formulaic name with your initials and date and make the username and password the same, eg. kc080622. +1. Create a dataverse: You can use the same username. +1. Create a dataset: You can use the same username; fill in the required fields (do not use a template). +1. Upload 3 different types of files: You can use a tabular file, 50by1000.dta, an image file, and a text file. +1. Publish the dataset. +1. Download a file. + + +## Alternative Deployment and Testing + +This workflow is fine for a single person testing a PR, one at a time. It would be awkward or impossible if there were multiple people wanting to test different PRs at the same time. If a developer is testing, they would likely just deploy to their dev environment. That might be ok, but is the env is fully configured enough to offer a real-world testing scenario? + +An alternative might be to spin an EC2 branch on AWS, potentially using sample data. This can take some time so another option might be to spin up a few, persistent AWS instances with sample data this way, one per tester, and just deploy new builds there when you want to test. You could even configure Jenkins projects for each if desired to maintain consistency in how they’re built. diff --git a/doc/sphinx-guides/source/qa/testing-infrastructure.md b/doc/sphinx-guides/source/qa/testing-infrastructure.md new file mode 100644 index 00000000000..804e4c0afe6 --- /dev/null +++ b/doc/sphinx-guides/source/qa/testing-infrastructure.md @@ -0,0 +1,46 @@ +# Infrastructure for Testing + +```{contents} Contents: +:local: +:depth: 3 +``` + +## Dataverse Internal + +To build and test a PR, we use a job called `IQSS_Dataverse_Internal` on (see {doc}`test-automation`), which deploys the .war file to an AWS instance named . + +(deploy-to-internal)= +### Building and Deploying a Pull Request from Jenkins to Dataverse-Internal + +1. Go to the QA column on our [project board](https://github.com/orgs/IQSS/projects/34), and select a pull request to test. + +1. From the pull request page, click the copy icon next to the pull request branch name. + +1. Log on to , select the `IQSS_Dataverse_Internal` project, and configure the repository URL and branch specifier to match the ones from the pull request. For example: + + * 8372-gdcc-xoai-library has IQSS implied + - **Repository URL:** https://github.com/IQSS/dataverse.git + - **Branch specifier:** */8372-gdcc-xoai-library + * GlobalDataverseCommunityConsortium:GDCC/DC-3B + - **Repository URL:** https://github.com/GlobalDataverseCommunityConsortium/dataverse.git + - **Branch specifier:** */GDCC/DC-3B. + +1. Click "Build Now" and note the build number in progress. + +1. Once complete, go to and check that the deployment succeeded, and that the homepage displays the latest build number. + +1. If for some reason it didn't deploy, check the server.log file. It may just be a caching issue so try un-deploying, deleting cache, restarting, and re-deploying on the server (`su - dataverse` then `/usr/local/payara6/bin/asadmin list-applications; /usr/local/payara6/bin/asadmin undeploy dataverse-6.1; /usr/local/payara6/bin/asadmin deploy /tmp/dataverse-6.1.war`) + +1. If that didn't work, you may have run into a Flyway DB script collision error but that should be indicated by the server.log. See {doc}`/developers/sql-upgrade-scripts` in the Developer Guide. In the case of a collision, ask the developer to rename the script. + +1. Assuming the above steps worked, and they should 99% of the time, test away! Note: be sure to `tail -F server.log` in a terminal window while you are doing any testing. This way you can spot problems that may not appear in the UI and have easier access to any stack traces for easier reporting. + +## Guides Server + +There is also a guides job called `guides.dataverse.org` (see {doc}`test-automation`). Any test builds of guides are deployed to a named directory on guides.dataverse.org and can be found and tested by going to the existing guides, removing the part of the URL that contains the version, and browsing the resulting directory listing for the latest change. + +Note that changes to guides can also be previewed on Read the Docs. In the pull request, look for a link like . This Read the Docs preview is also mentioned under also {doc}`/developers/documentation`. + +## Other Servers + +We can spin up additional AWS EC2 instances as needed. See {doc}`/developers/deployment` in the Developer Guide for the scripts we use. diff --git a/doc/sphinx-guides/source/style/foundations.rst b/doc/sphinx-guides/source/style/foundations.rst index 31e0c314a05..cc193666868 100755 --- a/doc/sphinx-guides/source/style/foundations.rst +++ b/doc/sphinx-guides/source/style/foundations.rst @@ -9,7 +9,7 @@ Foundation elements are the very basic building blocks to create a page in Datav Grid Layout =========== -`Bootstrap `__ provides a responsive, fluid, 12-column grid system that we use to organize our page layouts. +`Bootstrap `__ provides a responsive, fluid, 12-column grid system that we use to organize our page layouts. We use the fixed-width ``.container`` class which provides responsive widths (i.e. auto, 750px, 970px or 1170px) based on media queries for the page layout, with a series of rows and columns for the content. @@ -42,7 +42,7 @@ The grid layout uses ``.col-sm-*`` classes for horizontal groups of columns, ins Typography ========== -The typeface, text size, and line-height are set in the `Bootstrap CSS `__. We use Bootstrap's global default ``font-size`` of **14px**, with a ``line-height`` of **1.428**, which is applied to the ```` and all paragraphs. +The typeface, text size, and line-height are set in the `Bootstrap CSS `__. We use Bootstrap's global default ``font-size`` of **14px**, with a ``line-height`` of **1.428**, which is applied to the ```` and all paragraphs. .. code-block:: css @@ -57,7 +57,7 @@ The typeface, text size, and line-height are set in the `Bootstrap CSS `__. It provides the background, border, text and link colors used across the application. +The default color palette is set in the `Bootstrap CSS `__. It provides the background, border, text and link colors used across the application. Brand Colors @@ -138,7 +138,7 @@ We use our brand color, a custom burnt orange ``{color:#C55B28;}``, which is set Text Colors ----------- -Text color is the default setting from `Bootstrap CSS `__. +Text color is the default setting from `Bootstrap CSS `__. .. code-block:: css @@ -163,7 +163,7 @@ Text color is the default setting from `Bootstrap CSS `__. The hover state color is set to 15% darker. +Link color is the default setting from `Bootstrap CSS `__. The hover state color is set to 15% darker. **Please note**, there is a CSS override issue with the link color due to the use of both a Bootstrap stylesheet and a PrimeFaces stylesheet in the UI. We've added CSS such as ``.ui-widget-content a {color: #428BCA;}`` to our stylesheet to keep the link color consistent. @@ -204,7 +204,7 @@ Link color is the default setting from `Bootstrap CSS `__ can be used to style background and text colors. Semantic colors include various colors assigned to meaningful contextual values. We convey meaning through color with a handful of emphasis utility classes. +Contextual classes from `Bootstrap CSS `__ can be used to style background and text colors. Semantic colors include various colors assigned to meaningful contextual values. We convey meaning through color with a handful of emphasis utility classes. .. raw:: html @@ -259,7 +259,7 @@ We use various icons across the application, which we get from Bootstrap, FontCu Bootstrap Glyphicons -------------------- -There are over 250 glyphs in font format from the Glyphicon Halflings set provided by `Bootstrap `__. We utilize these mainly as icons inside of buttons and in message blocks. +There are over 250 glyphs in font format from the Glyphicon Halflings set provided by `Bootstrap `__. We utilize these mainly as icons inside of buttons and in message blocks. .. raw:: html @@ -305,7 +305,7 @@ The :doc:`/developers/fontcustom` section of the Developer Guide explains how to Socicon Icon Font ----------------- -We use `Socicon `__ for our custom social icons. In the footer we use icons for Twitter and Github. In our Share feature, we also use custom social icons to allow users to select from a list of social media channels. +We use `Socicon `__ for our custom social icons. In the footer we use icons for Twitter and Github. In our Share feature, we also use custom social icons to allow users to select from a list of social media channels. .. raw:: html diff --git a/doc/sphinx-guides/source/style/patterns.rst b/doc/sphinx-guides/source/style/patterns.rst index e96f17dc2ec..c6602ffa26e 100644 --- a/doc/sphinx-guides/source/style/patterns.rst +++ b/doc/sphinx-guides/source/style/patterns.rst @@ -1,7 +1,7 @@ Patterns ++++++++ -Patterns are what emerge when using the foundation elements together with basic objects like buttons and alerts, more complex Javascript components from `Bootstrap `__ like tooltips and dropdowns, and AJAX components from `PrimeFaces `__ like datatables and commandlinks. +Patterns are what emerge when using the foundation elements together with basic objects like buttons and alerts, more complex Javascript components from `Bootstrap `__ like tooltips and dropdowns, and AJAX components from `PrimeFaces `__ like datatables and commandlinks. .. contents:: |toctitle| :local: @@ -9,7 +9,7 @@ Patterns are what emerge when using the foundation elements together with basic Navbar ====== -The `Navbar component `__ from Bootstrap spans the top of the application and contains the logo/branding, aligned to the left, plus search form and links, aligned to the right. +The `Navbar component `__ from Bootstrap spans the top of the application and contains the logo/branding, aligned to the left, plus search form and links, aligned to the right. When logged in, the account name is a dropdown menu, linking the user to account-specific content and the log out link. @@ -74,7 +74,7 @@ When logged in, the account name is a dropdown menu, linking the user to account Breadcrumbs =========== -The breadcrumbs are displayed under the header, and provide a trail of links for users to navigate the hierarchy of containing objects, from file to dataset to Dataverse collection. It utilizes a JSF `repeat component `_ to iterate through the breadcrumbs. +The breadcrumbs are displayed under the header, and provide a trail of links for users to navigate the hierarchy of containing objects, from file to dataset to Dataverse collection. It utilizes a JSF `repeat component `_ to iterate through the breadcrumbs. .. raw:: html @@ -108,7 +108,7 @@ The breadcrumbs are displayed under the header, and provide a trail of links for Tables ====== -Most tables use the `DataTable components `__ from PrimeFaces and are styled using the `Tables component `__ from Bootstrap. +Most tables use the `DataTable components `__ from PrimeFaces and are styled using the `Tables component `__ from Bootstrap. .. raw:: html @@ -187,7 +187,7 @@ Most tables use the `DataTable components `__ from Bootstrap. Form elements like the `InputText component `__ from PrimeFaces are kept looking clean and consistent across each page. +Forms fulfill various functions across the site, but we try to style them consistently. We use the ``.form-horizontal`` layout, which uses ``.form-group`` to create a grid of rows for the labels and inputs. The consistent style of forms is maintained using the `Forms component `__ from Bootstrap. Form elements like the `InputText component `__ from PrimeFaces are kept looking clean and consistent across each page. .. raw:: html @@ -289,7 +289,7 @@ Here are additional form elements that are common across many pages, including r Buttons ======= -There are various types of buttons for various actions, so we have many components to use, including the `CommandButton component `__ and `CommandLink component `__ from PrimeFaces, as well as the basic JSF `Link component `__ and `OutputLink component `__. Those are styled using the `Buttons component `__, `Button Groups component `__ and `Buttons Dropdowns component `__ from Bootstrap. +There are various types of buttons for various actions, so we have many components to use, including the `CommandButton component `__ and `CommandLink component `__ from PrimeFaces, as well as the basic JSF `Link component `__ and `OutputLink component `__. Those are styled using the `Buttons component `__, `Button Groups component `__ and `Buttons Dropdowns component `__ from Bootstrap. Action Buttons -------------- @@ -668,7 +668,7 @@ Another variation of icon-only buttons uses the ``.btn-link`` style class from B Pagination ========== -We use the `Pagination component `__ from Bootstrap for paging through search results. +We use the `Pagination component `__ from Bootstrap for paging through search results. .. raw:: html @@ -738,7 +738,7 @@ We use the `Pagination component `__ from Bootstrap is used for publication status (DRAFT, In Review, Unpublished, Deaccessioned), and Dataset version, as well as Tabular Data Tags (Survey, Time Series, Panel, Event, Genomics, Network, Geospatial). +The `Labels component `__ from Bootstrap is used for publication status (DRAFT, In Review, Unpublished, Deaccessioned), and Dataset version, as well as Tabular Data Tags (Survey, Time Series, Panel, Event, Genomics, Network, Geospatial). .. raw:: html @@ -768,7 +768,7 @@ The `Labels component `__ from Boots Alerts ====== -For our help/information, success, warning, and error message blocks we use a custom built UI component based on the `Alerts component `__ from Bootstrap. +For our help/information, success, warning, and error message blocks we use a custom built UI component based on the `Alerts component `__ from Bootstrap. .. raw:: html @@ -859,9 +859,9 @@ Style classes can be added to ``p``, ``div``, ``span`` and other elements to add Images ====== -For images, we use the `GraphicImage component `__ from PrimeFaces, or the basic JSF `GraphicImage component `__. +For images, we use the `GraphicImage component `__ from PrimeFaces, or the basic JSF `GraphicImage component `__. -To display images in a responsive way, they are styled with ``.img-responsive``, an `Images CSS class `__ from Bootstrap. +To display images in a responsive way, they are styled with ``.img-responsive``, an `Images CSS class `__ from Bootstrap. .. raw:: html @@ -879,7 +879,7 @@ To display images in a responsive way, they are styled with ``.img-responsive``, Panels ====== -The most common of our containers, the `Panels component `__ from Bootstrap is used to add a border and padding around sections of content like metadata blocks. Displayed with a header and/or footer, it can also be used with the `Collapse plugin `__ from Bootstrap. +The most common of our containers, the `Panels component `__ from Bootstrap is used to add a border and padding around sections of content like metadata blocks. Displayed with a header and/or footer, it can also be used with the `Collapse plugin `__ from Bootstrap. .. raw:: html @@ -943,7 +943,7 @@ Tabs Tabs are used to provide content panes on a page that allow the user to view different sections of content without navigating to a different page. -We use the `TabView component `__ from PrimeFaces, which is styled using the `Tab component `__ from Bootstrap. +We use the `TabView component `__ from PrimeFaces, which is styled using the `Tab component `__ from Bootstrap. .. raw:: html @@ -989,7 +989,7 @@ Modals are dialog prompts that act as popup overlays, but don't create a new bro Buttons usually provide the UI prompt. A user clicks the button, which then opens a `Dialog component `__ or `Confirm Dialog component `__ from PrimeFaces that displays the modal with the necessary information and actions to take. -The modal is styled using the `Modal component `__ from Bootstrap, for a popup window that prompts a user for information, with overlay and a backdrop, then header, content, and buttons. We can use style classes from Bootstrap for large (``.bs-example-modal-lg``) and small (``.bs-example-modal-sm``) width options. +The modal is styled using the `Modal component `__ from Bootstrap, for a popup window that prompts a user for information, with overlay and a backdrop, then header, content, and buttons. We can use style classes from Bootstrap for large (``.bs-example-modal-lg``) and small (``.bs-example-modal-sm``) width options. .. raw:: html diff --git a/doc/sphinx-guides/source/user/account.rst b/doc/sphinx-guides/source/user/account.rst index 675bae90e5d..81c416bafd1 100755 --- a/doc/sphinx-guides/source/user/account.rst +++ b/doc/sphinx-guides/source/user/account.rst @@ -109,7 +109,7 @@ If you are leaving your institution and need to convert your Dataverse installat ORCID Log In ~~~~~~~~~~~~~ -You can set up your Dataverse installation account to allow you to log in using your ORCID credentials. ORCID® is an independent non-profit effort to provide an open registry of unique researcher identifiers and open services to link research activities and organizations to these identifiers. Learn more at `orcid.org `_. +You can set up your Dataverse installation account to allow you to log in using your ORCID credentials. ORCID® is an independent non-profit effort to provide an open registry of unique researcher identifiers and open services to link research activities and organizations to these identifiers. Learn more at `orcid.org `_. Create a Dataverse installation account using ORCID ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/sphinx-guides/source/user/appendix.rst b/doc/sphinx-guides/source/user/appendix.rst index 7d60054ae17..878da96475a 100755 --- a/doc/sphinx-guides/source/user/appendix.rst +++ b/doc/sphinx-guides/source/user/appendix.rst @@ -22,13 +22,13 @@ Supported Metadata Detailed below are what metadata schemas we support for Citation and Domain Specific Metadata in the Dataverse Project: -- `Citation Metadata `__ (`see .tsv version `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, `DataCite 3.1 `__, and Dublin Core's `DCMI Metadata Terms `__ . Language field uses `ISO 639-1 `__ controlled vocabulary. -- `Geospatial Metadata `__ (`see .tsv version `__): compliant with DDI Lite, DDI 2.5 Codebook, DataCite, and Dublin Core. Country / Nation field uses `ISO 3166-1 `_ controlled vocabulary. +- `Citation Metadata `__ (`see .tsv version `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, `DataCite 3.1 `__, and Dublin Core's `DCMI Metadata Terms `__ . Language field uses `ISO 639-1 `__ controlled vocabulary. +- `Geospatial Metadata `__ (`see .tsv version `__): compliant with DDI Lite, DDI 2.5 Codebook, DataCite, and Dublin Core. Country / Nation field uses `ISO 3166-1 `_ controlled vocabulary. - `Social Science & Humanities Metadata `__ (`see .tsv version `__): compliant with DDI Lite, DDI 2.5 Codebook, and Dublin Core. - `Astronomy and Astrophysics Metadata `__ (`see .tsv version `__): These metadata elements can be mapped/exported to the International Virtual Observatory Alliance’s (IVOA) - `VOResource Schema format `__ and is based on - `Virtual Observatory (VO) Discovery and Provenance Metadata `__ (`see .tsv version `__). -- `Life Sciences Metadata `__ (`see .tsv version `__): based on `ISA-Tab Specification `__, along with controlled vocabulary from subsets of the `OBI Ontology `__ and the `NCBI Taxonomy for Organisms `__. + `VOResource Schema format `__ and is based on + `Virtual Observatory (VO) Discovery and Provenance Metadata `__. +- `Life Sciences Metadata `__ (`see .tsv version `__): based on `ISA-Tab Specification `__, along with controlled vocabulary from subsets of the `OBI Ontology `__ and the `NCBI Taxonomy for Organisms `__. - `Journal Metadata `__ (`see .tsv version `__): based on the `Journal Archiving and Interchange Tag Set, version 1.2 `__. Experimental Metadata @@ -37,7 +37,7 @@ Experimental Metadata Unlike supported metadata, experimental metadata is not enabled by default in a new Dataverse installation. Feedback via any `channel `_ is welcome! - `CodeMeta Software Metadata `__: based on the `CodeMeta Software Metadata Schema, version 2.0 `__ (`see .tsv version `__) -- `Computational Workflow Metadata `__ (`see .tsv version `__): adapted from `Bioschemas Computational Workflow Profile, version 1.0 `__ and `Codemeta `__. +- `Computational Workflow Metadata `__ (`see .tsv version `__): adapted from `Bioschemas Computational Workflow Profile, version 1.0 `__ and `Codemeta `__. Please note: these custom metadata schemas are not included in the Solr schema for indexing by default, you will need to add them as necessary for your custom metadata blocks. See "Update the Solr Schema" in :doc:`../admin/metadatacustomization`. diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index 3b5b4ec6ba8..d3faf479deb 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -200,6 +200,7 @@ Previewers are available for the following file types: - Text - PDF +- Markdown - Tabular (CSV, Excel, etc., see :doc:`tabulardataingest/index`) - Code (R, etc.) - Images (PNG, GIF, JPG) @@ -227,9 +228,8 @@ Additional download options available for tabular data (found in the same drop-d - As tab-delimited data (with the variable names in the first row); - The original file uploaded by the user; - Saved as R data (if the original file was not in R format); -- Variable Metadata (as a `DDI Codebook `_ XML file); -- Data File Citation (currently in either RIS, EndNote XML, or BibTeX format). - +- Variable Metadata (as a `DDI Codebook `_ XML file); +- Data File Citation (currently in either RIS, EndNote XML, or BibTeX format). Differentially Private (DP) Metadata can also be accessed for restricted tabular files if the data depositor has created a DP Metadata Release. See :ref:`dp-release-create` for more information. @@ -336,7 +336,7 @@ You can also search for files within datasets that have been tagged as "Workflow Astronomy (FITS) ---------------- -Metadata found in the header section of `Flexible Image Transport System (FITS) files `_ are automatically extracted by the Dataverse Software, aggregated and displayed in the Astronomy Domain-Specific Metadata of the Dataset that the file belongs to. This FITS file metadata, is therefore searchable and browsable (facets) at the Dataset-level. +Metadata found in the header section of `Flexible Image Transport System (FITS) files `_ are automatically extracted by the Dataverse Software, aggregated and displayed in the Astronomy Domain-Specific Metadata of the Dataset that the file belongs to. This FITS file metadata, is therefore searchable and browsable (facets) at the Dataset-level. .. _geojson: @@ -495,7 +495,7 @@ Choosing a License ------------------ Each Dataverse installation provides a set of license(s) data can be released under, and whether users can specify custom terms instead (see below). -One of the available licenses (often the `Creative Commons CC0 Public Domain Dedication `_) serves as the default if you do not make an explicit choice. +One of the available licenses (often the `Creative Commons CC0 Public Domain Dedication `_) serves as the default if you do not make an explicit choice. If you want to apply one of the other available licenses to your dataset, you can change it on the Terms tab of your Dataset page. License Selection and Professional Norms @@ -566,7 +566,7 @@ This is where you will enable a particular Guestbook for your dataset, which is Roles & Permissions =================== -Dataverse installation user accounts can be granted roles that define which actions they are allowed to take on specific Dataverse collections, datasets, and/or files. Each role comes with a set of permissions, which define the specific actions that users may take. +Dataverse installation user accounts can be granted roles that define which actions they are allowed to take on specific Dataverse collections, datasets, and/or files. Each role comes with a set of permissions, which define the specific actions that users may take. It is not possible to grant a role that comes with a permission that the granting user themselves does not have. Roles and permissions may also be granted to groups. Groups can be defined as a set of Dataverse user accounts, a collection of IP addresses (e.g. all users of a library's computers), or a collection of all users who log in using a particular institutional login (e.g. everyone who logs in with a particular university's account credentials). @@ -783,7 +783,7 @@ The "Compute" button on dataset and file pages will allow you to compute on a si Cloud Storage Access -------------------- -If you need to access a dataset in a more flexible way than the Compute button provides, then you can use the Cloud Storage Access box on the dataset page to copy the dataset's container name. This unique identifer can then be used to allow direct access to the dataset. +If you need to access a dataset in a more flexible way than the Compute button provides, then you can use the Cloud Storage Access box on the dataset page to copy the dataset's container name. This unique identifier can then be used to allow direct access to the dataset. .. _deaccession: diff --git a/doc/sphinx-guides/source/user/dataverse-management.rst b/doc/sphinx-guides/source/user/dataverse-management.rst index b5e8d8f4fc9..0e0fbcc0883 100755 --- a/doc/sphinx-guides/source/user/dataverse-management.rst +++ b/doc/sphinx-guides/source/user/dataverse-management.rst @@ -25,6 +25,8 @@ Creating a Dataverse collection is easy but first you must be a registered user * **Category**: Select a category that best describes the type of Dataverse collection this will be. For example, if this is a Dataverse collection for an individual researcher's datasets, select *Researcher*. If this is a Dataverse collection for an institution, select *Organization or Institution*. * **Email**: This is the email address that will be used as the contact for this particular Dataverse collection. You can have more than one contact email address for your Dataverse collection. * **Description**: Provide a description of this Dataverse collection. This will display on the landing page of your Dataverse collection and in the search result list. The description field supports certain HTML tags, if you'd like to format your text (, ,
    ,
    , , ,
    ,
    ,
    , ,
    ,

    -

    , , , ,
  • ,
      ,

      ,

      , , , , , , , 
        ). + * **Dataset Metadata Langauge**: (If enabled) Select which language should be used when entering dataset metadata, or leave that choice to dataset creators. + * **Guestbook Entry Option**: (If enabled) Select whether guestbooks are displayed when a user requests access to restricted file(s) or when they initiate a download. #. **Choose the sets of Metadata Fields for datasets in this Dataverse collection**: * By default the metadata elements will be from the host Dataverse collection that this new Dataverse collection is created in. * The Dataverse Software offers metadata standards for multiple domains. To learn more about the metadata standards in the Dataverse Software please check out the :doc:`/user/appendix`. @@ -212,7 +214,7 @@ Dataset linking allows a Dataverse collection owner to "link" their Dataverse co For example, researchers working on a collaborative study across institutions can each link their own individual institutional Dataverse collections to the one collaborative dataset, making it easier for interested parties from each institution to find the study. -In order to link a dataset, you will need your account to have the "Add Dataset" permission on the Dataverse collection that is doing the linking. If you created the Dataverse collection then you should have this permission already, but if not then you will need to ask the admin of that Dataverse collection to assign that permission to your account. You do not need any special permissions on the dataset being linked. +In order to link a dataset, you will need your account to have the "Publish Dataset" permission on the Dataverse collection that is doing the linking. If you created the Dataverse collection then you should have this permission already, but if not then you will need to ask the admin of that Dataverse collection to assign that permission to your account. You do not need any special permissions on the dataset being linked. To link a dataset to your Dataverse collection, you must navigate to that dataset and click the white "Link" button in the upper-right corner of the dataset page. This will open up a window where you can type in the name of the Dataverse collection that you would like to link the dataset to. Select your Dataverse collection and click the save button. This will establish the link, and the dataset will now appear under your Dataverse collection. diff --git a/doc/sphinx-guides/source/user/find-use-data.rst b/doc/sphinx-guides/source/user/find-use-data.rst index 2e82a1482b4..bea23cbcd0e 100755 --- a/doc/sphinx-guides/source/user/find-use-data.rst +++ b/doc/sphinx-guides/source/user/find-use-data.rst @@ -71,7 +71,7 @@ View Files Files in a Dataverse installation each have their own landing page that can be reached through the search results or through the Files table on their parent dataset's page. The dataset page and file page offer much the same functionality in terms of viewing and editing files, with a few small exceptions. -- In installations that have enabled support for persistent identifers (PIDs) at the file level, the file page includes the file's DOI or handle, which can be found in the file citation and also under the Metadata tab. +- In installations that have enabled support for persistent identifiers (PIDs) at the file level, the file page includes the file's DOI or handle, which can be found in the file citation and also under the Metadata tab. - Previewers for several common file types are available and can be added by installation administrators. - The file page's Versions tab gives you a version history that is more focused on the individual file rather than the dataset as a whole. diff --git a/doc/sphinx-guides/source/user/tabulardataingest/ingestprocess.rst b/doc/sphinx-guides/source/user/tabulardataingest/ingestprocess.rst index f1d5611ede9..1e481a54da6 100644 --- a/doc/sphinx-guides/source/user/tabulardataingest/ingestprocess.rst +++ b/doc/sphinx-guides/source/user/tabulardataingest/ingestprocess.rst @@ -27,12 +27,12 @@ separately, in a relational database, so that it can be accessed efficiently by the application. For the purposes of archival preservation it can be exported, in plain text XML files, using a standardized, open `DDI Codebook -`_ +`_ format. (more info below) Tabular Data and Metadata -========================== +========================= Data vs. Metadata ----------------- @@ -53,6 +53,25 @@ Tabular Metadata in the Dataverse Software The structure of the metadata defining tabular data variables used in the Dataverse Software was originally based on the `DDI Codebook -`_ format. +`_ format. You can see an example of DDI output under the :ref:`data-variable-metadata-access` section of the :doc:`/api/dataaccess` section of the API Guide. + +Uningest and Reingest +===================== + +Ingest will only work for files whose content can be interpreted as a table. +Multi-sheet spreadsheets and CSV files with a different number of entries per row are two examples where ingest will fail. +This is non-fatal. The Dataverse software will not produce a .tab version of the file and will show a warning to users +who can see the draft version of the dataset containing the file that will indicate why ingest failed. When the file is published as +part of the dataset, there will be no indication that ingest was attempted and failed. + +If the warning message is a concern, the Dataverse software includes both an API call (see :ref:`file-uningest` in the :doc:`/api/native-api` guide) +and an Edit/Uningest menu option displayed on the file page, that allow a file to be uningested by anyone who can publish the dataset. + +Uningest will remove the warning. Uningest can also be done for a file that was successfully ingested. This is only available to superusers. +This will remove the variable-level metadata and the .tab version of the file that was generated. + +If a file is a tabular format but was never ingested, .e.g. due to the ingest file size limit being lower in the past, or if ingest had failed, +e.g. in a prior Dataverse version, an reingest API (see :ref:`file-reingest` in the :doc:`/api/native-api` guide) and a file page Edit/Reingest option +in the user interface allow ingest to be tried again. As with Uningest, this fucntionality is only available to superusers. diff --git a/doc/sphinx-guides/source/versions.rst b/doc/sphinx-guides/source/versions.rst index 2000a2097f0..2cf7f46dc5e 100755 --- a/doc/sphinx-guides/source/versions.rst +++ b/doc/sphinx-guides/source/versions.rst @@ -7,7 +7,8 @@ Dataverse Software Documentation Versions This list provides a way to refer to the documentation for previous and future versions of the Dataverse Software. In order to learn more about the updates delivered from one version to another, visit the `Releases `__ page in our GitHub repo. - pre-release `HTML (not final!) `__ and `PDF (experimental!) `__ built from the :doc:`develop ` branch :doc:`(how to contribute!) ` -- 6.0 +- 6.1 +- `6.0 `__ - `5.14 `__ - `5.13 `__ - `5.12.1 `__ diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index ab44dbc1806..ae0aa2bdf76 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -9,14 +9,54 @@ services: restart: on-failure user: payara environment: - - DATAVERSE_DB_HOST=postgres - - DATAVERSE_DB_PASSWORD=secret - - DATAVERSE_DB_USER=${DATAVERSE_DB_USER} - - ENABLE_JDWP=1 - - DATAVERSE_FEATURE_API_BEARER_AUTH=1 + DATAVERSE_DB_HOST: postgres + DATAVERSE_DB_PASSWORD: secret + DATAVERSE_DB_USER: ${DATAVERSE_DB_USER} + ENABLE_JDWP: "1" + ENABLE_RELOAD: "1" + SKIP_DEPLOY: "${SKIP_DEPLOY}" + DATAVERSE_JSF_REFRESH_PERIOD: "1" + DATAVERSE_FEATURE_API_BEARER_AUTH: "1" + DATAVERSE_AUTH_OIDC_ENABLED: "1" + DATAVERSE_AUTH_OIDC_CLIENT_ID: test + DATAVERSE_AUTH_OIDC_CLIENT_SECRET: 94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8 + DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL: http://keycloak.mydomain.com:8090/realms/test + # These two oai settings are here to get HarvestingServerIT to pass + dataverse_oai_server_maxidentifiers: "2" + dataverse_oai_server_maxrecords: "2" + JVM_ARGS: -Ddataverse.files.storage-driver-id=file1 + -Ddataverse.files.file1.type=file + -Ddataverse.files.file1.label=Filesystem + -Ddataverse.files.file1.directory=${STORAGE_DIR}/store + -Ddataverse.files.localstack1.type=s3 + -Ddataverse.files.localstack1.label=LocalStack + -Ddataverse.files.localstack1.custom-endpoint-url=http://localstack:4566 + -Ddataverse.files.localstack1.custom-endpoint-region=us-east-2 + -Ddataverse.files.localstack1.bucket-name=mybucket + -Ddataverse.files.localstack1.path-style-access=true + -Ddataverse.files.localstack1.upload-redirect=true + -Ddataverse.files.localstack1.download-redirect=true + -Ddataverse.files.localstack1.access-key=default + -Ddataverse.files.localstack1.secret-key=default + -Ddataverse.files.minio1.type=s3 + -Ddataverse.files.minio1.label=MinIO + -Ddataverse.files.minio1.custom-endpoint-url=http://minio:9000 + -Ddataverse.files.minio1.custom-endpoint-region=us-east-1 + -Ddataverse.files.minio1.bucket-name=mybucket + -Ddataverse.files.minio1.path-style-access=true + -Ddataverse.files.minio1.upload-redirect=false + -Ddataverse.files.minio1.download-redirect=false + -Ddataverse.files.minio1.access-key=4cc355_k3y + -Ddataverse.files.minio1.secret-key=s3cr3t_4cc355_k3y + -Ddataverse.pid.providers=fake + -Ddataverse.pid.default-provider=fake + -Ddataverse.pid.fake.type=FAKE + -Ddataverse.pid.fake.label=FakeDOIProvider + -Ddataverse.pid.fake.authority=10.5072 + -Ddataverse.pid.fake.shoulder=FK2/ ports: - "8080:8080" # HTTP (Dataverse Application) - - "4848:4848" # HTTP (Payara Admin Console) + - "4949:4848" # HTTPS (Payara Admin Console) - "9009:9009" # JDWP - "8686:8686" # JMX networks: @@ -24,9 +64,11 @@ services: depends_on: - dev_postgres - dev_solr + - dev_dv_initializer volumes: - ./docker-dev-volumes/app/data:/dv - ./docker-dev-volumes/app/secrets:/secrets + - ./target/dataverse:/opt/payara/deployments/dataverse:ro tmpfs: - /dumps:mode=770,size=2052M,uid=1000,gid=1000 - /tmp:mode=770,size=2052M,uid=1000,gid=1000 @@ -44,6 +86,17 @@ services: networks: - dataverse + dev_dv_initializer: + container_name: "dev_dv_initializer" + image: gdcc/configbaker:unstable + restart: "no" + command: + - sh + - -c + - "fix-fs-perms.sh dv" + volumes: + - ./docker-dev-volumes/app/data:/dv + dev_postgres: container_name: "dev_postgres" hostname: postgres @@ -109,8 +162,8 @@ services: - /mail:mode=770,size=128M,uid=1000,gid=1000 dev_keycloak: - container_name: "dev_keycloack" - image: 'quay.io/keycloak/keycloak:19.0' + container_name: "dev_keycloak" + image: 'quay.io/keycloak/keycloak:21.0' hostname: keycloak environment: - KEYCLOAK_ADMIN=kcadmin @@ -125,7 +178,60 @@ services: ports: - "8090:8090" volumes: - - './conf/keycloak/oidc-realm.json:/opt/keycloak/data/import/oidc-realm.json' + - './conf/keycloak/test-realm.json:/opt/keycloak/data/import/test-realm.json' + + # This proxy configuration is only intended to be used for development purposes! + # DO NOT USE IN PRODUCTION! HIGH SECURITY RISK! + dev_proxy: + image: caddy:2-alpine + # The command below is enough to enable using the admin gui, but it will not rewrite location headers to HTTP. + # To achieve rewriting from https:// to http://, we need a simple configuration file + #command: ["caddy", "reverse-proxy", "-f", ":4848", "-t", "https://dataverse:4848", "--insecure"] + command: ["caddy", "run", "-c", "/Caddyfile"] + ports: + - "4848:4848" # Will expose Payara Admin Console (HTTPS) as HTTP + restart: always + volumes: + - ./conf/proxy/Caddyfile:/Caddyfile:ro + depends_on: + - dev_dataverse + networks: + - dataverse + + dev_localstack: + container_name: "dev_localstack" + hostname: "localstack" + image: localstack/localstack:2.3.2 + restart: on-failure + ports: + - "127.0.0.1:4566:4566" + environment: + - DEBUG=${DEBUG-} + - DOCKER_HOST=unix:///var/run/docker.sock + - HOSTNAME_EXTERNAL=localstack + networks: + - dataverse + volumes: + - ./conf/localstack:/etc/localstack/init/ready.d + tmpfs: + - /localstack:mode=770,size=128M,uid=1000,gid=1000 + + dev_minio: + container_name: "dev_minio" + hostname: "minio" + image: minio/minio + restart: on-failure + ports: + - "9000:9000" + - "9001:9001" + networks: + - dataverse + volumes: + - ./docker-dev-volumes/minio_storage:/data + environment: + MINIO_ROOT_USER: 4cc355_k3y + MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k3y + command: server /data networks: dataverse: diff --git a/docker/compose/demo/.gitignore b/docker/compose/demo/.gitignore new file mode 100644 index 00000000000..1269488f7fb --- /dev/null +++ b/docker/compose/demo/.gitignore @@ -0,0 +1 @@ +data diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml new file mode 100644 index 00000000000..e4bcc9778d7 --- /dev/null +++ b/docker/compose/demo/compose.yml @@ -0,0 +1,138 @@ +version: "2.4" + +services: + + dataverse: + container_name: "dataverse" + hostname: dataverse + image: gdcc/dataverse:alpha + restart: on-failure + user: payara + environment: + _CT_DATAVERSE_SITEURL: "https://demo.example.org" + DATAVERSE_DB_HOST: postgres + DATAVERSE_DB_PASSWORD: secret + DATAVERSE_DB_USER: dataverse + DATAVERSE_FEATURE_API_BEARER_AUTH: "1" + JVM_ARGS: -Ddataverse.files.storage-driver-id=file1 + -Ddataverse.files.file1.type=file + -Ddataverse.files.file1.label=Filesystem + -Ddataverse.files.file1.directory=${STORAGE_DIR}/store + -Ddataverse.pid.providers=fake + -Ddataverse.pid.default-provider=fake + -Ddataverse.pid.fake.type=FAKE + -Ddataverse.pid.fake.label=FakeDOIProvider + -Ddataverse.pid.fake.authority=10.5072 + -Ddataverse.pid.fake.shoulder=FK2/ + ports: + - "8080:8080" # HTTP (Dataverse Application) + - "4848:4848" # HTTP (Payara Admin Console) + - "9009:9009" # JDWP + - "8686:8686" # JMX + networks: + - dataverse + depends_on: + - postgres + - solr + - dv_initializer + volumes: + - ./data/app/data:/dv + - ./data/app/secrets:/secrets + tmpfs: + - /dumps:mode=770,size=2052M,uid=1000,gid=1000 + - /tmp:mode=770,size=2052M,uid=1000,gid=1000 + mem_limit: 2147483648 # 2 GiB + mem_reservation: 1024m + privileged: false + + bootstrap: + container_name: "bootstrap" + image: gdcc/configbaker:alpha + restart: "no" + command: + - bootstrap.sh + - dev + #- demo + #volumes: + # - ./demo:/scripts/bootstrap/demo + networks: + - dataverse + + dv_initializer: + container_name: "dv_initializer" + image: gdcc/configbaker:alpha + restart: "no" + command: + - sh + - -c + - "fix-fs-perms.sh dv" + volumes: + - ./data/app/data:/dv + + postgres: + container_name: "postgres" + hostname: postgres + image: postgres:13 + restart: on-failure + environment: + - POSTGRES_USER=dataverse + - POSTGRES_PASSWORD=secret + ports: + - "5432:5432" + networks: + - dataverse + volumes: + - ./data/postgresql/data:/var/lib/postgresql/data + + solr_initializer: + container_name: "solr_initializer" + image: gdcc/configbaker:alpha + restart: "no" + command: + - sh + - -c + - "fix-fs-perms.sh solr && cp -a /template/* /solr-template" + volumes: + - ./data/solr/data:/var/solr + - ./data/solr/conf:/solr-template + + solr: + container_name: "solr" + hostname: "solr" + image: solr:9.3.0 + depends_on: + - solr_initializer + restart: on-failure + ports: + - "8983:8983" + networks: + - dataverse + command: + - "solr-precreate" + - "collection1" + - "/template" + volumes: + - ./data/solr/data:/var/solr + - ./data/solr/conf:/template + + smtp: + container_name: "smtp" + hostname: "smtp" + image: maildev/maildev:2.0.5 + restart: on-failure + ports: + - "25:25" # smtp server + - "1080:1080" # web ui + environment: + - MAILDEV_SMTP_PORT=25 + - MAILDEV_MAIL_DIRECTORY=/mail + networks: + - dataverse + #volumes: + # - ./data/smtp/data:/mail + tmpfs: + - /mail:mode=770,size=128M,uid=1000,gid=1000 + +networks: + dataverse: + driver: bridge diff --git a/docker/util/intellij/README.md b/docker/util/intellij/README.md new file mode 100644 index 00000000000..281d0e50ea6 --- /dev/null +++ b/docker/util/intellij/README.md @@ -0,0 +1,13 @@ +# IntelliJ Auto-Copy of Webapp Files + +When deploying the webapp via Payara Tools, you can use this tool to immediately copy changes to non-code files into the running deployment, instantly seeing changes in your browser. + +Note: as this relies on using a Bash shell script, it is pretty much limited to Mac and Linux. +Feel free to extend and provide a PowerShell equivalent! + +1. Install the [File Watcher plugin](https://plugins.jetbrains.com/plugin/7177-file-watchers) +2. Import the [watchers.xml](./watchers.xml) file at *File > Settings > Tools > File Watchers* +3. Once you have the deployment running (see Container Guides), editing files at `src/main/webapp` will be copied into the deployment after saving the edited file. + +Alternatively, you can add an External tool and trigger via menu or shortcut to do the copying manually: +https://www.jetbrains.com/help/idea/configuring-third-party-tools.html diff --git a/docker/util/intellij/cpwebapp.sh b/docker/util/intellij/cpwebapp.sh new file mode 100755 index 00000000000..2d08fb1a873 --- /dev/null +++ b/docker/util/intellij/cpwebapp.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# +# cpwebapp +# + +set -eu + +PROJECT_DIR="$1" +FILE_TO_COPY="$2" +RELATIVE_PATH="${FILE_TO_COPY#"${PROJECT_DIR}/"}" + +# Check if RELATIVE_PATH starts with 'src/main/webapp', otherwise ignore +if [[ "$RELATIVE_PATH" == "src/main/webapp"* ]]; then + # Extract version from POM, so we don't need to have Maven on the PATH + VERSION=$(grep -oPm1 "(?<=)[^<]+" "$PROJECT_DIR/modules/dataverse-parent/pom.xml") + + # Construct the target path by cutting off the local prefix and prepend with in-container path + RELATIVE_PATH_WITHOUT_WEBAPP="${RELATIVE_PATH#src/main/webapp/}" + TARGET_PATH="/opt/payara/appserver/glassfish/domains/domain1/applications/dataverse-$VERSION/${RELATIVE_PATH_WITHOUT_WEBAPP}" + + # Copy file to container + docker cp "$FILE_TO_COPY" "dev_dataverse:$TARGET_PATH" +fi diff --git a/docker/util/intellij/watchers.xml b/docker/util/intellij/watchers.xml new file mode 100644 index 00000000000..4ccee125ec2 --- /dev/null +++ b/docker/util/intellij/watchers.xml @@ -0,0 +1,22 @@ + + + + + \ No newline at end of file diff --git a/mdc-logs/raw-mdc-2019-01-07.log b/mdc-logs/raw-mdc-2019-01-07.log deleted file mode 100644 index d7a6386160e..00000000000 --- a/mdc-logs/raw-mdc-2019-01-07.log +++ /dev/null @@ -1,6 +0,0 @@ -#Fields: event_time client_ip session_cookie_id user_cookie_id user_id request_url identifier filename size user-agent title publisher publisher_id authors publication_date version other_id target_url publication_year -2019-01-07T15:14:51-0500 0:0:0:0:0:0:0:1 9f4209d3c177d3cb77f4d06cf3ba - :guest http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV doi:10.5072/FK2/XTT5BV - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 Dataset One - 1 Smith, Robert| Kew, Susie 2019-01-07T18:20:54Z 1 - http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV 2019 -2019-01-07T15:15:15-0500 0:0:0:0:0:0:0:1 9f4209d3c177d3cb77f4d06cf3ba - :guest http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV doi:10.5072/FK2/XTT5BV - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 Dataset One - 1 Smith, Robert| Kew, Susie 2019-01-07T18:20:54Z 1 - http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV 2019 -2019-01-07T15:16:04-0500 0:0:0:0:0:0:0:1 9f4209d3c177d3cb77f4d06cf3ba - :guest http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV doi:10.5072/FK2/XTT5BV - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 Dataset One - 1 Smith, Robert| Kew, Susie 2019-01-07T18:20:54Z 1 - http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV 2019 -2019-01-07T15:16:14-0500 0:0:0:0:0:0:0:1 9f4209d3c177d3cb77f4d06cf3ba - :guest http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV doi:10.5072/FK2/XTT5BV 168298bae7c-2c5bbc1a9c8c 1 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 Dataset One - 1 Smith, Robert| Kew, Susie 2019-01-07T18:20:54Z 1 - http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV 2019 -2019-01-07T15:16:19-0500 0:0:0:0:0:0:0:1 9f4209d3c177d3cb77f4d06cf3ba - :guest http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV doi:10.5072/FK2/XTT5BV 168298bb8ce-337d8df49763 4026 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 Dataset One - 1 Smith, Robert| Kew, Susie 2019-01-07T18:20:54Z 1 - http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV 2019 diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index 97aa4cd2792..f093ced37c1 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -22,7 +22,7 @@ # # Make the Java base image and version configurable (useful for trying newer Java versions and flavors) -ARG JAVA_IMAGE="eclipse-temurin:11-jre" +ARG JAVA_IMAGE="eclipse-temurin:17-jre" FROM $JAVA_IMAGE # Default payara ports to expose @@ -49,8 +49,6 @@ ENV PAYARA_DIR="${HOME_DIR}/appserver" \ ENV PATH="${PATH}:${PAYARA_DIR}/bin:${SCRIPT_DIR}" \ DOMAIN_DIR="${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}" \ DEPLOY_PROPS="" \ - PREBOOT_COMMANDS="${CONFIG_DIR}/pre-boot-commands.asadmin" \ - POSTBOOT_COMMANDS="${CONFIG_DIR}/post-boot-commands.asadmin" \ JVM_ARGS="" \ MEM_MAX_RAM_PERCENTAGE="70.0" \ MEM_XSS="512k" \ @@ -65,7 +63,8 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin:${SCRIPT_DIR}" \ JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError" \ ENABLE_JMX=0 \ ENABLE_JDWP=0 \ - ENABLE_RELOAD=0 + ENABLE_RELOAD=0 \ + SKIP_DEPLOY=0 ### PART 1: SYSTEM ### ARG UID=1000 @@ -84,8 +83,11 @@ RUN <> "$POSTBOOT_COMMANDS"; + if [ -n "$SKIP_DEPLOY" ] && { [ "$SKIP_DEPLOY" = "1" ] || [ "$SKIP_DEPLOY" = "true" ]; }; then + echo "Skipping deployment of $1 as requested."; + else + echo "Adding deployment target $1 to post boot commands"; + echo "$DEPLOY_STATEMENT" >> "$POSTBOOT_COMMANDS"; + fi fi } diff --git a/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh b/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh index bb0984332f7..016151168d5 100644 --- a/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh +++ b/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh @@ -16,7 +16,7 @@ ENABLE_JMX=${ENABLE_JMX:-0} ENABLE_JDWP=${ENABLE_JDWP:-0} ENABLE_RELOAD=${ENABLE_RELOAD:-0} -DV_PREBOOT=${PAYARA_DIR}/dataverse_preboot +DV_PREBOOT=${CONFIG_DIR}/dataverse_preboot echo "# Dataverse preboot configuration for Payara" > "${DV_PREBOOT}" # 1. Configure JMX (enabled by default on port 8686, but requires SSL) @@ -56,6 +56,9 @@ fi if [ "${ENABLE_RELOAD}" = "1" ]; then echo "Enabling hot reload of deployments." echo "set configs.config.server-config.admin-service.das-config.dynamic-reload-enabled=true" >> "${DV_PREBOOT}" + echo "set configs.config.server-config.admin-service.das-config.autodeploy-enabled=true" >> "${DV_PREBOOT}" + export DATAVERSE_JSF_PROJECT_STAGE=${DATAVERSE_JSF_PROJECT_STAGE:-"Development"} + export DATAVERSE_JSF_REFRESH_PERIOD=${DATAVERSE_JSF_REFRESH_PERIOD:-"0"} fi # 4. Add the commands to the existing preboot file, but insert BEFORE deployment diff --git a/modules/container-configbaker/Dockerfile b/modules/container-configbaker/Dockerfile index 564216b3572..9b98334d72b 100644 --- a/modules/container-configbaker/Dockerfile +++ b/modules/container-configbaker/Dockerfile @@ -26,8 +26,12 @@ RUN true && \ # Make our working directories mkdir -p ${SCRIPT_DIR} ${SECRETS_DIR} ${SOLR_TEMPLATE} -# Get in the scripts and make them executable (just in case...) +# Get in the scripts COPY maven/scripts maven/solr/update-fields.sh ${SCRIPT_DIR}/ +# Copy the data from scripts/api that provide the common base setup you'd get from the installer. +# ".dockerignore" will take care of taking only the bare necessities +COPY maven/setup ${SCRIPT_DIR}/bootstrap/base/ +# Make the scripts executable RUN chmod +x ${SCRIPT_DIR}/*.sh ${BOOTSTRAP_DIR}/*/*.sh # Copy the Solr config bits @@ -35,9 +39,6 @@ COPY --from=solr /opt/solr/server/solr/configsets/_default ${SOLR_TEMPLATE}/ COPY maven/solr/*.xml ${SOLR_TEMPLATE}/conf/ RUN rm ${SOLR_TEMPLATE}/conf/managed-schema.xml -# Copy the data from scripts/api that provide the common base setup you'd get from the installer. -# ".dockerignore" will take care of taking only the bare necessities -COPY maven/setup ${SCRIPT_DIR}/bootstrap/base/ # Set the entrypoint to tini (as a process supervisor) ENTRYPOINT ["/usr/bin/dumb-init", "--"] diff --git a/modules/container-configbaker/scripts/bootstrap.sh b/modules/container-configbaker/scripts/bootstrap.sh index 1aa9e232953..a00916880db 100644 --- a/modules/container-configbaker/scripts/bootstrap.sh +++ b/modules/container-configbaker/scripts/bootstrap.sh @@ -5,16 +5,17 @@ set -euo pipefail function usage() { - echo "Usage: $(basename "$0") [-h] [-u instanceUrl] [-t timeout] []" + echo "Usage: $(basename "$0") [-h] [-u instanceUrl] [-t timeout] [-e targetEnvFile] []" echo "" echo "Execute initial configuration (bootstrapping) of an empty Dataverse instance." echo -n "Known personas: " find "${BOOTSTRAP_DIR}" -mindepth 1 -maxdepth 1 -type d -exec basename {} \; | paste -sd ' ' echo "" echo "Parameters:" - echo "instanceUrl - Location on container network where to reach your instance. Default: 'http://dataverse:8080'" - echo " timeout - Provide how long to wait for the instance to become available (using wait4x). Default: '2m'" - echo " persona - Configure persona to execute. Calls ${BOOTSTRAP_DIR}//init.sh. Default: 'base'" + echo " instanceUrl - Location on container network where to reach your instance. Default: 'http://dataverse:8080'" + echo " timeout - Provide how long to wait for the instance to become available (using wait4x). Default: '2m'" + echo "targetEnvFile - Path to a file where the bootstrap process can expose information as env vars (e.g. dataverseAdmin's API token)" + echo " persona - Configure persona to execute. Calls ${BOOTSTRAP_DIR}//init.sh. Default: 'base'" echo "" echo "Note: This script will wait for the Dataverse instance to be available before executing the bootstrapping." echo " It also checks if already bootstrapped before (availability of metadata blocks) and skip if true." @@ -24,13 +25,15 @@ function usage() { # Set some defaults as documented DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"} -TIMEOUT=${TIMEOUT:-"2m"} +TIMEOUT=${TIMEOUT:-"3m"} +TARGET_ENV_FILE=${TARGET_ENV_FILE:-""} -while getopts "u:t:h" OPTION +while getopts "u:t:e:h" OPTION do case "$OPTION" in u) DATAVERSE_URL="$OPTARG" ;; t) TIMEOUT="$OPTARG" ;; + e) TARGET_ENV_FILE="$OPTARG" ;; h) usage;; \?) usage;; esac @@ -54,6 +57,21 @@ if [[ $BLOCK_COUNT -gt 0 ]]; then exit 0 fi +# Provide a space to store environment variables output to +ENV_OUT=$(mktemp) +export ENV_OUT + # Now execute the bootstrapping script echo "Now executing bootstrapping script at ${BOOTSTRAP_DIR}/${PERSONA}/init.sh." -exec "${BOOTSTRAP_DIR}/${PERSONA}/init.sh" +# shellcheck disable=SC1090 +source "${BOOTSTRAP_DIR}/${PERSONA}/init.sh" + +# If the env file option was given, check if the file is writeable and copy content from the temporary file +if [[ -n "${TARGET_ENV_FILE}" ]]; then + if [[ -f "${TARGET_ENV_FILE}" && -w "${TARGET_ENV_FILE}" ]]; then + cat "${ENV_OUT}" > "${TARGET_ENV_FILE}" + else + echo "File ${TARGET_ENV_FILE} not found, is a directory or not writeable" + exit 2 + fi +fi diff --git a/modules/container-configbaker/scripts/bootstrap/demo/init.sh b/modules/container-configbaker/scripts/bootstrap/demo/init.sh new file mode 100644 index 00000000000..e8d1d07dd2d --- /dev/null +++ b/modules/container-configbaker/scripts/bootstrap/demo/init.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +set -euo pipefail + +# Set some defaults +DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"} +export DATAVERSE_URL + +BLOCKED_API_KEY=${BLOCKED_API_KEY:-"unblockme"} +export BLOCKED_API_KEY + +# --insecure is used so we can configure a few things but +# later in this script we'll apply the changes as if we had +# run the script without --insecure. +echo "Running base setup-all.sh..." +"${BOOTSTRAP_DIR}"/base/setup-all.sh --insecure -p=admin1 | tee /tmp/setup-all.sh.out + +echo "" +echo "Setting DOI provider to \"FAKE\"..." +curl -sS -X PUT -d FAKE "${DATAVERSE_URL}/api/admin/settings/:DoiProvider" + +echo "" +echo "Revoke the key that allows for creation of builtin users..." +curl -sS -X DELETE "${DATAVERSE_URL}/api/admin/settings/BuiltinUsers.KEY" + +echo "" +echo "Set key for accessing blocked API endpoints..." +curl -sS -X PUT -d "$BLOCKED_API_KEY" "${DATAVERSE_URL}/api/admin/settings/:BlockedApiKey" + +echo "" +echo "Set policy to only allow access to admin APIs with with a key..." +curl -sS -X PUT -d unblock-key "${DATAVERSE_URL}/api/admin/settings/:BlockedApiPolicy" + +echo "" +echo "Block admin and other sensitive API endpoints..." +curl -sS -X PUT -d 'admin,builtin-users' "${DATAVERSE_URL}/api/admin/settings/:BlockedApiEndpoints" + +echo "" +echo "Done, your instance has been configured for demo or eval. Have a nice day!" diff --git a/modules/container-configbaker/scripts/bootstrap/dev/init.sh b/modules/container-configbaker/scripts/bootstrap/dev/init.sh index 1042478963d..efdaee3d0c3 100644 --- a/modules/container-configbaker/scripts/bootstrap/dev/init.sh +++ b/modules/container-configbaker/scripts/bootstrap/dev/init.sh @@ -17,6 +17,8 @@ curl "${DATAVERSE_URL}/api/admin/settings/:DoiProvider" -X PUT -d FAKE API_TOKEN=$(grep apiToken "/tmp/setup-all.sh.out" | jq ".data.apiToken" | tr -d \") export API_TOKEN +# ${ENV_OUT} comes from bootstrap.sh and will expose the saved information back to the host if enabled. +echo "API_TOKEN=${API_TOKEN}" >> "${ENV_OUT}" echo "Publishing root dataverse..." curl -H "X-Dataverse-key:$API_TOKEN" -X POST "${DATAVERSE_URL}/api/dataverses/:root/actions/:publish" diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index c45d59e4f5f..fd176c2fd80 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -40,8 +40,8 @@ com.google.cloud - google-cloud-bom - ${google.cloud.version} + libraries-bom + ${google.library.version} pom import @@ -131,7 +131,7 @@ - 6.0 + 6.1 17 UTF-8 @@ -149,42 +149,43 @@ 6.2023.8 - 42.6.0 + 42.7.2 9.3.0 1.12.290 - 0.177.0 + 26.30.0 8.0.0 1.7.35 - 2.11.0 + 2.15.1 1.2 3.12.0 - 1.21 + 1.26.0 4.5.13 4.4.14 - 5.1.0 + 5.2.0 - 1.15.0 + 1.19.0 2.10.1 - 5.10.0 5.4.0 + 0.8.10 9.3 - 3.8.1 - 3.2.2 + 3.11.0 + 3.3.0 3.3.2 - 3.2.0 - 3.0.0-M1 - 3.0.0-M5 - 3.0.0-M5 - 3.3.0 + 3.5.0 + 3.1.1 + 3.1.0 + 3.1.0 + 3.6.0 + 3.3.1 3.0.0-M7 3.0.1 4.0.0-M4 @@ -193,12 +194,12 @@ 1.3.0 3.3.0 - 3.1.2 + 3.2.2 1.6.13 1.7.0 - 0.43.0 + 0.43.4 @@ -254,6 +255,11 @@ maven-failsafe-plugin ${maven-failsafe-plugin.version} + + org.apache.maven.plugins + maven-resources-plugin + ${maven-resources-plugin.version} + org.apache.maven.plugins maven-enforcer-plugin @@ -271,6 +277,11 @@ + + org.jacoco + jacoco-maven-plugin + ${maven-jacoco-plugin.version} + io.fabric8 docker-maven-plugin @@ -415,6 +426,7 @@ https://artifacts.unidata.ucar.edu/repository/unidata-all/ + + --> diff --git a/pom.xml b/pom.xml index 7ba22d2a076..8b2850e1df9 100644 --- a/pom.xml +++ b/pom.xml @@ -20,15 +20,15 @@ false false + integration war 1.2.18.4 - 9.21.2 + 9.22.1 1.20.1 - 0.8.7 5.2.1 2.4.1 5.5.3 @@ -57,7 +57,7 @@ In case the dependency is both transitive and direct (e. g. some common lib for logging), manage the version above and add the direct dependency here WITHOUT version tag, too. --> - + + + com.github.ben-manes.caffeine + caffeine + 3.1.8 + + io.gdcc @@ -459,7 +466,7 @@ org.duracloud common - 7.1.1 + 8.0.0 org.slf4j @@ -474,7 +481,7 @@ org.duracloud storeclient - 7.1.1 + 8.0.0 org.slf4j @@ -599,6 +606,29 @@ postgresql test + + com.github.dasniko + testcontainers-keycloak + 3.0.0 + test + + + org.testcontainers + localstack + test + + + + jakarta.servlet + jakarta.servlet-api + 4.0.4 + test + + org.mockito mockito-core @@ -617,24 +647,23 @@ ${smallrye-mpconfig.version} test + + org.htmlunit + htmlunit + 3.9.0 + test + - - + src/main/java @@ -692,6 +721,7 @@ true false + ${project.build.directory}/${project.artifactId} true @@ -712,24 +742,94 @@ org.jacoco jacoco-maven-plugin - ${jacoco.version} - - ${basedir}/target/coverage-reports/jacoco-unit.exec - ${basedir}/target/coverage-reports/jacoco-unit.exec - jacoco-initialize prepare-agent + + ${project.build.directory}/coverage-reports/jacoco-unit.exec + surefire.jacoco.args + ${skipUnitTests} + + edu/harvard/iq/dataverse/* + io/gdcc/* + org/dataverse/* + + - jacoco-site - package + jacoco-after-unit + test report + + ${project.build.directory}/coverage-reports/jacoco-unit.exec + ${project.reporting.outputDirectory}/jacoco-unit-test-coverage-report + ${skipUnitTests} + + + + jacoco-initialize-it + pre-integration-test + + prepare-agent + + + ${project.build.directory}/coverage-reports/jacoco-integration.exec + failsafe.jacoco.args + ${skipIntegrationTests} + + edu/harvard/iq/dataverse/* + io/gdcc/* + org/dataverse/* + + + + + jacoco-after-it + post-integration-test + + report + + + ${project.build.directory}/coverage-reports/jacoco-integration.exec + ${project.reporting.outputDirectory}/jacoco-integration-test-coverage-report + ${skipIntegrationTests} + + + + jacoco-merge-unit-and-it + post-integration-test + + merge + + + + + ${project.build.directory}/coverage-reports/ + + *.exec + + + + ${project.build.directory}/coverage-reports/merged.exec + ${skipIntegrationTests} + + + + jacoco-report + post-integration-test + + report + + + ${project.build.directory}/coverage-reports/merged.exec + ${project.reporting.outputDirectory}/jacoco-merged-test-coverage-report + ${skipIntegrationTests} + @@ -744,6 +844,9 @@ 2.3.1 + + ${project.reporting.outputDirectory}/jacoco-merged-test-coverage-report/jacoco.xml + org.apache.maven.plugins @@ -753,8 +856,27 @@ ${testsToExclude} ${skipUnitTests} + ${surefire.jacoco.args} ${argLine} + + + org.apache.maven.plugins + maven-failsafe-plugin + + ${it.groups} + ${failsafe.jacoco.args} ${argLine} + ${skipIntegrationTests} + + + + + integration-test + verify + + + + org.apache.maven.plugins maven-checkstyle-plugin @@ -790,10 +912,11 @@ true docker-build - 13 + 16 gdcc/dataverse:${app.image.tag} unstable + false gdcc/base:${base.image.tag} unstable gdcc/configbaker:${conf.image.tag} @@ -806,6 +929,7 @@ ${postgresql.server.version} ${solr.version} dataverse + ${app.skipDeploy} diff --git a/scripts/api/data/dataset-create-new-all-default-fields.json b/scripts/api/data/dataset-create-new-all-default-fields.json index 4af128955c9..3bcf134bc76 100644 --- a/scripts/api/data/dataset-create-new-all-default-fields.json +++ b/scripts/api/data/dataset-create-new-all-default-fields.json @@ -22,9 +22,9 @@ }, { "typeName": "alternativeTitle", - "multiple": false, + "multiple": true, "typeClass": "primitive", - "value": "Alternative Title" + "value": ["Alternative Title"] }, { "typeName": "alternativeURL", @@ -907,14 +907,14 @@ "typeClass": "primitive", "value": "-70" }, - "northLongitude": { - "typeName": "northLongitude", + "northLatitude": { + "typeName": "northLatitude", "multiple": false, "typeClass": "primitive", "value": "43" }, - "southLongitude": { - "typeName": "southLongitude", + "southLatitude": { + "typeName": "southLatitude", "multiple": false, "typeClass": "primitive", "value": "42" @@ -933,14 +933,14 @@ "typeClass": "primitive", "value": "-13" }, - "northLongitude": { - "typeName": "northLongitude", + "northLatitude": { + "typeName": "northLatitude", "multiple": false, "typeClass": "primitive", "value": "29" }, - "southLongitude": { - "typeName": "southLongitude", + "southLatitude": { + "typeName": "southLatitude", "multiple": false, "typeClass": "primitive", "value": "28" diff --git a/scripts/api/data/metadatablocks/astrophysics.tsv b/scripts/api/data/metadatablocks/astrophysics.tsv index 4039d32cb75..92792d404c9 100644 --- a/scripts/api/data/metadatablocks/astrophysics.tsv +++ b/scripts/api/data/metadatablocks/astrophysics.tsv @@ -2,13 +2,13 @@ astrophysics Astronomy and Astrophysics Metadata #datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id astroType Type The nature or genre of the content of the files in the dataset. text 0 TRUE TRUE TRUE TRUE FALSE FALSE astrophysics - astroFacility Facility The observatory or facility where the data was obtained. text 1 TRUE TRUE TRUE TRUE FALSE FALSE astrophysics - astroInstrument Instrument The instrument used to collect the data. text 2 TRUE TRUE TRUE TRUE FALSE FALSE astrophysics + astroFacility Facility The observatory or facility where the data was obtained. text 1 TRUE FALSE TRUE TRUE FALSE FALSE astrophysics + astroInstrument Instrument The instrument used to collect the data. text 2 TRUE FALSE TRUE TRUE FALSE FALSE astrophysics astroObject Object Astronomical Objects represented in the data (Given as SIMBAD recognizable names preferred). text 3 TRUE FALSE TRUE TRUE FALSE FALSE astrophysics resolution.Spatial Spatial Resolution The spatial (angular) resolution that is typical of the observations, in decimal degrees. text 4 TRUE FALSE FALSE TRUE FALSE FALSE astrophysics resolution.Spectral Spectral Resolution The spectral resolution that is typical of the observations, given as the ratio \u03bb/\u0394\u03bb. text 5 TRUE FALSE FALSE TRUE FALSE FALSE astrophysics resolution.Temporal Time Resolution The temporal resolution that is typical of the observations, given in seconds. text 6 FALSE FALSE FALSE FALSE FALSE FALSE astrophysics - coverage.Spectral.Bandpass Bandpass Conventional bandpass name text 7 TRUE TRUE TRUE TRUE FALSE FALSE astrophysics + coverage.Spectral.Bandpass Bandpass Conventional bandpass name text 7 TRUE FALSE TRUE TRUE FALSE FALSE astrophysics coverage.Spectral.CentralWavelength Central Wavelength (m) The central wavelength of the spectral bandpass, in meters. Enter a floating-point number. float 8 TRUE FALSE TRUE TRUE FALSE FALSE astrophysics coverage.Spectral.Wavelength Wavelength Range The minimum and maximum wavelength of the spectral bandpass. Enter a floating-point number. none 9 FALSE FALSE TRUE FALSE FALSE FALSE astrophysics coverage.Spectral.MinimumWavelength Minimum (m) The minimum wavelength of the spectral bandpass, in meters. Enter a floating-point number. float 10 TRUE FALSE FALSE TRUE FALSE FALSE coverage.Spectral.Wavelength astrophysics diff --git a/scripts/api/data/metadatablocks/biomedical.tsv b/scripts/api/data/metadatablocks/biomedical.tsv index 28d59130c34..d70f754336a 100644 --- a/scripts/api/data/metadatablocks/biomedical.tsv +++ b/scripts/api/data/metadatablocks/biomedical.tsv @@ -13,7 +13,7 @@ studyAssayOtherTechnologyType Other Technology Type If Other was selected in Technology Type, list any other technology types that were used in this Dataset. text 9 TRUE FALSE TRUE TRUE FALSE FALSE biomedical studyAssayPlatform Technology Platform The manufacturer and name of the technology platform used in the assay (e.g. Bruker AVANCE). text 10 TRUE TRUE TRUE TRUE FALSE FALSE biomedical studyAssayOtherPlatform Other Technology Platform If Other was selected in Technology Platform, list any other technology platforms that were used in this Dataset. text 11 TRUE FALSE TRUE TRUE FALSE FALSE biomedical - studyAssayCellType Cell Type The name of the cell line from which the source or sample derives. text 12 TRUE TRUE TRUE TRUE FALSE FALSE biomedical + studyAssayCellType Cell Type The name of the cell line from which the source or sample derives. text 12 TRUE FALSE TRUE TRUE FALSE FALSE biomedical #controlledVocabulary DatasetField Value identifier displayOrder studyDesignType Case Control EFO_0001427 0 studyDesignType Cross Sectional EFO_0001428 1 diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index 18bc31c2dd6..c5af05927dc 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -3,7 +3,7 @@ #datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI title Title The main title of the Dataset text 0 TRUE FALSE FALSE FALSE TRUE TRUE citation http://purl.org/dc/terms/title subtitle Subtitle A secondary title that amplifies or states certain limitations on the main title text 1 FALSE FALSE FALSE FALSE FALSE FALSE citation - alternativeTitle Alternative Title Either 1) a title commonly used to refer to the Dataset or 2) an abbreviation of the main title text 2 FALSE FALSE FALSE FALSE FALSE FALSE citation http://purl.org/dc/terms/alternative + alternativeTitle Alternative Title Either 1) a title commonly used to refer to the Dataset or 2) an abbreviation of the main title text 2 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/alternative alternativeURL Alternative URL Another URL where one can view or access the data in the Dataset, e.g. a project or personal webpage https:// url 3 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE citation https://schema.org/distribution otherId Other Identifier Another unique identifier for the Dataset (e.g. producer's or another repository's identifier) none 4 : FALSE FALSE TRUE FALSE FALSE FALSE citation otherIdAgency Agency The name of the agency that generated the other identifier text 5 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE otherId citation @@ -70,7 +70,7 @@ seriesName Name The name of the dataset series text 66 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation seriesInformation Information Can include 1) a history of the series and 2) a summary of features that apply to the series textbox 67 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation software Software Information about the software used to generate the Dataset none 68 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy - softwareName Name The name of software used to generate the Dataset text 69 #VALUE FALSE TRUE FALSE FALSE FALSE FALSE software citation + softwareName Name The name of software used to generate the Dataset text 69 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation softwareVersion Version The version of the software used to generate the Dataset, e.g. 4.11 text 70 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation relatedMaterial Related Material Information, such as a persistent ID or citation, about the material related to the Dataset, such as appendices or sampling information available outside of the Dataset textbox 71 FALSE FALSE TRUE FALSE FALSE FALSE citation relatedDatasets Related Dataset Information, such as a persistent ID or citation, about a related dataset, such as previous research on the Dataset's subject textbox 72 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/relation diff --git a/scripts/api/data/metadatablocks/geospatial.tsv b/scripts/api/data/metadatablocks/geospatial.tsv index a3a8e7efd58..09d19c608e5 100644 --- a/scripts/api/data/metadatablocks/geospatial.tsv +++ b/scripts/api/data/metadatablocks/geospatial.tsv @@ -8,10 +8,10 @@ otherGeographicCoverage Other Other information on the geographic coverage of the data. text 4 #VALUE, FALSE FALSE FALSE TRUE FALSE FALSE geographicCoverage geospatial geographicUnit Geographic Unit Lowest level of geographic aggregation covered by the Dataset, e.g., village, county, region. text 5 TRUE FALSE TRUE TRUE FALSE FALSE geospatial geographicBoundingBox Geographic Bounding Box The fundamental geometric description for any Dataset that models geography is the geographic bounding box. It describes the minimum box, defined by west and east longitudes and north and south latitudes, which includes the largest geographic extent of the Dataset's geographic coverage. This element is used in the first pass of a coordinate-based search. Inclusion of this element in the codebook is recommended, but is required if the bound polygon box is included. none 6 FALSE FALSE TRUE FALSE FALSE FALSE geospatial - westLongitude West Longitude Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= West Bounding Longitude Value <= 180,0. text 7 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial - eastLongitude East Longitude Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= East Bounding Longitude Value <= 180,0. text 8 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial - northLongitude North Latitude Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0. text 9 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial - southLongitude South Latitude Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0. text 10 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + westLongitude Westernmost (Left) Longitude Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= West Bounding Longitude Value <= 180,0. text 7 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + eastLongitude Easternmost (Right) Longitude Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= East Bounding Longitude Value <= 180,0. text 8 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + northLatitude Northernmost (Top) Latitude Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0. text 9 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + southLatitude Southernmost (Bottom) Latitude Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0. text 10 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial #controlledVocabulary DatasetField Value identifier displayOrder country Afghanistan 0 country Albania 1 diff --git a/scripts/api/setup-all.sh b/scripts/api/setup-all.sh index e247caa72b5..5ddd9a35fdc 100755 --- a/scripts/api/setup-all.sh +++ b/scripts/api/setup-all.sh @@ -57,10 +57,6 @@ echo "- Allow internal signup" curl -X PUT -d yes "${DATAVERSE_URL}/api/admin/settings/:AllowSignUp" curl -X PUT -d "/dataverseuser.xhtml?editMode=CREATE" "${DATAVERSE_URL}/api/admin/settings/:SignUpUrl" -curl -X PUT -d doi "${DATAVERSE_URL}/api/admin/settings/:Protocol" -curl -X PUT -d 10.5072 "${DATAVERSE_URL}/api/admin/settings/:Authority" -curl -X PUT -d "FK2/" "${DATAVERSE_URL}/api/admin/settings/:Shoulder" -curl -X PUT -d DataCite "${DATAVERSE_URL}/api/admin/settings/:DoiProvider" curl -X PUT -d burrito "${DATAVERSE_URL}/api/admin/settings/BuiltinUsers.KEY" curl -X PUT -d localhost-only "${DATAVERSE_URL}/api/admin/settings/:BlockedApiPolicy" curl -X PUT -d 'native/http' "${DATAVERSE_URL}/api/admin/settings/:UploadMethods" diff --git a/scripts/api/setup-optional-harvard.sh b/scripts/api/setup-optional-harvard.sh index fcbcc08a8e6..1311464e8ff 100755 --- a/scripts/api/setup-optional-harvard.sh +++ b/scripts/api/setup-optional-harvard.sh @@ -3,6 +3,7 @@ SERVER=http://localhost:8080/api echo "Setting up Harvard-specific settings" # :Authority and :Shoulder are commented out so this script can be used on test servers +# Should now use the new multipid JVM options instead of these settings #curl -X PUT -d 10.7910 "$SERVER/admin/settings/:Authority" #curl -X PUT -d "DVN/" "$SERVER/admin/settings/:Shoulder" echo "- Application Status header" diff --git a/scripts/deploy/phoenix.dataverse.org/post b/scripts/deploy/phoenix.dataverse.org/post index e4c8817844b..9d37c183a1a 100755 --- a/scripts/deploy/phoenix.dataverse.org/post +++ b/scripts/deploy/phoenix.dataverse.org/post @@ -4,7 +4,6 @@ cd scripts/api cd ../.. psql -U dvnapp dvndb -f scripts/database/reference_data.sql psql -U dvnapp dvndb -f doc/sphinx-guides/source/_static/util/createsequence.sql -curl http://localhost:8080/api/admin/settings/:DoiProvider -X PUT -d FAKE scripts/search/tests/publish-dataverse-root git checkout scripts/api/data/dv-root.json scripts/search/tests/grant-authusers-add-on-root diff --git a/scripts/dev/dev-rebuild.sh b/scripts/dev/dev-rebuild.sh index 9eae195b135..898212b4664 100755 --- a/scripts/dev/dev-rebuild.sh +++ b/scripts/dev/dev-rebuild.sh @@ -56,9 +56,6 @@ cd ../.. echo "Creating SQL sequence..." psql -h localhost -U $DB_USER $DB_NAME -f doc/sphinx-guides/source/_static/util/createsequence.sql -echo "Setting DOI provider to \"FAKE\"..." -curl http://localhost:8080/api/admin/settings/:DoiProvider -X PUT -d FAKE - echo "Allowing GUI edits to be visible without redeploy..." $PAYARA_DIR/glassfish/bin/asadmin create-system-properties "dataverse.jsf.refresh-period=1" diff --git a/scripts/dev/docker-final-setup.sh b/scripts/dev/docker-final-setup.sh index d2453619ec2..e20ce7ad6b6 100755 --- a/scripts/dev/docker-final-setup.sh +++ b/scripts/dev/docker-final-setup.sh @@ -10,9 +10,6 @@ cd ../.. echo "Setting system mail address..." curl -X PUT -d "dataverse@localhost" "http://localhost:8080/api/admin/settings/:SystemEmail" -echo "Setting DOI provider to \"FAKE\"..." -curl "http://localhost:8080/api/admin/settings/:DoiProvider" -X PUT -d FAKE - API_TOKEN=$(grep apiToken "/tmp/setup-all.sh.out" | jq ".data.apiToken" | tr -d \") export API_TOKEN diff --git a/scripts/installer/as-setup.sh b/scripts/installer/as-setup.sh index fc5b378cff5..c89bcb4ff4d 100755 --- a/scripts/installer/as-setup.sh +++ b/scripts/installer/as-setup.sh @@ -102,17 +102,18 @@ function preliminary_setup() # password reset token timeout in minutes ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.auth.password-reset-timeout-in-minutes=60" - # DataCite DOI Settings + # Fake DOI Settings # (we can no longer offer EZID with their shared test account) # jvm-options use colons as separators, escape as literal DOI_BASEURL_ESC=`echo $DOI_BASEURL | sed -e 's/:/\\\:/'` - ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.datacite.username=${DOI_USERNAME}" - ./asadmin $ASADMIN_OPTS create-jvm-options '\-Ddataverse.pid.datacite.password=${ALIAS=doi_password_alias}' - ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.datacite.mds-api-url=$DOI_BASEURL_ESC" - + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.providers=fake" + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.fake.type=FAKE" + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.fake.label=Fake DOI Provider" + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.fake.authority=10.5072" + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.fake.shoulder=FK2/" # jvm-options use colons as separators, escape as literal - DOI_DATACITERESTAPIURL_ESC=`echo $DOI_DATACITERESTAPIURL | sed -e 's/:/\\\:/'` - ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.datacite.rest-api-url=$DOI_DATACITERESTAPIURL_ESC" + #DOI_DATACITERESTAPIURL_ESC=`echo $DOI_DATACITERESTAPIURL | sed -e 's/:/\\\:/'` + #./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.testDC.datacite.rest-api-url=$DOI_DATACITERESTAPIURL_ESC" ./asadmin $ASADMIN_OPTS create-jvm-options "-Ddataverse.timerServer=true" diff --git a/scripts/installer/install.py b/scripts/installer/install.py index 3aedbd8c6ad..99316efb83b 100644 --- a/scripts/installer/install.py +++ b/scripts/installer/install.py @@ -413,7 +413,7 @@ # 3e. set permissions: - conn_cmd = "GRANT CREATE PRIVILEGES on DATABASE "+pgDb+" to "+pgUser+";" + conn_cmd = "GRANT ALL PRIVILEGES on DATABASE "+pgDb+" to "+pgUser+";" try: cur.execute(conn_cmd) except: @@ -422,9 +422,13 @@ conn.close() if int(pg_major_version) >= 15: - conn_cmd = "GRANT ALL ON SCHEMA public TO "+pgUser+";" - print("PostgreSQL 15 or higher detected. Running " + conn_cmd) + admin_conn_string = "dbname='"+pgDb+"' user='postgres' password='"+pgAdminPassword+"' host='"+pgHost+"'" + conn = psycopg2.connect(admin_conn_string) + conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + cur = conn.cursor() + conn_cmd = "GRANT CREATE ON SCHEMA public TO "+pgUser+";" try: + print("PostgreSQL 15 or higher detected. Running " + conn_cmd) cur.execute(conn_cmd) except: if force: @@ -587,15 +591,14 @@ print("\n\nYou should now have a running Dataverse instance at") print(" http://" + hostName + ":8080\n\n") -# DataCite instructions: +# PID instructions: -print("\nYour Dataverse has been configured to use DataCite, to register DOI global identifiers in the ") +print("\nYour Dataverse has been configured to use a Fake DOI Provider, registering (non-resolvable) DOI global identifiers in the ") print("test name space \"10.5072\" with the \"shoulder\" \"FK2\"") -print("However, you have to contact DataCite (support\@datacite.org) and request a test account, before you ") -print("can publish datasets. Once you receive the account name and password, add them to your domain.xml,") -print("as the following two JVM options:") -print("\t-Ddataverse.pid.datacite.username=...") -print("\t-Ddataverse.pid.datacite.password=...") +print("You can reconfigure to use additional/alternative providers.") +print("If you intend to use DOIs, you should contact DataCite (support\@datacite.org) or GDCC (see https://www.gdcc.io/about.html) and request a test account.") +print("Once you receive the account information (name, password, authority, shoulder), add them to your configuration ") +print("as described in the Dataverse Guides (see https://guides.dataverse.org/en/latest/installation/config.html#persistent-identifiers-and-publishing-datasets),") print("and restart payara") print("If this is a production Dataverse and you are planning to register datasets as ") print("\"real\", non-test DOIs or Handles, consult the \"Persistent Identifiers and Publishing Datasets\"") diff --git a/src/main/docker/Dockerfile b/src/main/docker/Dockerfile index 88020a118b5..ed670294873 100644 --- a/src/main/docker/Dockerfile +++ b/src/main/docker/Dockerfile @@ -29,6 +29,11 @@ FROM $BASE_IMAGE # See also https://download.eclipse.org/microprofile/microprofile-config-3.0/microprofile-config-spec-3.0.html#configprofile ENV MP_CONFIG_PROFILE=ct +# Workaround to configure upload directories by default to useful place until we can have variable lookups in +# defaults for glassfish-web.xml and other places. +ENV DATAVERSE_FILES_UPLOADS="${STORAGE_DIR}/uploads" +ENV DATAVERSE_FILES_DOCROOT="${STORAGE_DIR}/docroot" + # Copy app and deps from assembly in proper layers COPY --chown=payara:payara maven/deps ${DEPLOY_DIR}/dataverse/WEB-INF/lib/ COPY --chown=payara:payara maven/app ${DEPLOY_DIR}/dataverse/ @@ -51,4 +56,6 @@ LABEL org.opencontainers.image.created="@git.build.time@" \ org.opencontainers.image.vendor="Global Dataverse Community Consortium" \ org.opencontainers.image.licenses="Apache-2.0" \ org.opencontainers.image.title="Dataverse Application Image" \ - org.opencontainers.image.description="This container image provides the research data repository software Dataverse in a box." \ No newline at end of file + org.opencontainers.image.description="This container image provides the research data repository software Dataverse in a box." \ + org.dataverse.deps.postgresql.version="@postgresql.server.version@" \ + org.dataverse.deps.solr.version="@solr.version@" \ No newline at end of file diff --git a/src/main/docker/assembly.xml b/src/main/docker/assembly.xml index 9f9b39617a3..62cd910ef9b 100644 --- a/src/main/docker/assembly.xml +++ b/src/main/docker/assembly.xml @@ -3,7 +3,7 @@ - target/${project.artifactId}-${project.version} + target/${project.artifactId} app WEB-INF/lib/**/* @@ -11,7 +11,7 @@ - target/${project.artifactId}-${project.version}/WEB-INF/lib + target/${project.artifactId}/WEB-INF/lib deps diff --git a/src/main/docker/scripts/init_3_wait_dataverse_db_host.sh b/src/main/docker/scripts/init_3_wait_dataverse_db_host.sh new file mode 100644 index 00000000000..c234ad33307 --- /dev/null +++ b/src/main/docker/scripts/init_3_wait_dataverse_db_host.sh @@ -0,0 +1,4 @@ +#It was reported on 9949 that on the first launch of the containers Dataverse would not be deployed on payara +#this was caused by a race condition due postgress not being ready. A solion for docker compose was prepared +#but didn't work due a compatibility issue on the Maven pluggin [https://github.com/fabric8io/docker-maven-plugin/issues/888] +wait-for "${DATAVERSE_DB_HOST:-postgres}:${DATAVERSE_DB_PORT:-5432}" -t 120 \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java deleted file mode 100644 index f1bfc3e290b..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java +++ /dev/null @@ -1,700 +0,0 @@ -package edu.harvard.iq.dataverse; - -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import edu.harvard.iq.dataverse.util.SystemConfig; -import java.io.InputStream; -import jakarta.ejb.EJB; -import jakarta.inject.Inject; -import java.util.*; -import java.util.logging.Level; -import java.util.logging.Logger; - -import org.apache.commons.lang3.RandomStringUtils; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; - -public abstract class AbstractGlobalIdServiceBean implements GlobalIdServiceBean { - - private static final Logger logger = Logger.getLogger(AbstractGlobalIdServiceBean.class.getCanonicalName()); - - @Inject - DataverseServiceBean dataverseService; - @EJB - protected - SettingsServiceBean settingsService; - @Inject - protected - DvObjectServiceBean dvObjectService; - @Inject - SystemConfig systemConfig; - - protected Boolean configured = null; - - public static String UNAVAILABLE = ":unav"; - - @Override - public Map getMetadataForCreateIndicator(DvObject dvObjectIn) { - logger.log(Level.FINE,"getMetadataForCreateIndicator(DvObject)"); - Map metadata = new HashMap<>(); - metadata = addBasicMetadata(dvObjectIn, metadata); - metadata.put("datacite.publicationyear", generateYear(dvObjectIn)); - metadata.put("_target", getTargetUrl(dvObjectIn)); - return metadata; - } - - protected Map getUpdateMetadata(DvObject dvObjectIn) { - logger.log(Level.FINE,"getUpdateMetadataFromDataset"); - Map metadata = new HashMap<>(); - metadata = addBasicMetadata(dvObjectIn, metadata); - return metadata; - } - - protected Map addBasicMetadata(DvObject dvObjectIn, Map metadata) { - - String authorString = dvObjectIn.getAuthorString(); - if (authorString.isEmpty() || authorString.contains(DatasetField.NA_VALUE)) { - authorString = UNAVAILABLE; - } - - String producerString = dataverseService.getRootDataverseName(); - - if (producerString.isEmpty() || producerString.equals(DatasetField.NA_VALUE)) { - producerString = UNAVAILABLE; - } - - String titleString = dvObjectIn.getCurrentName(); - - if (titleString.isEmpty() || titleString.equals(DatasetField.NA_VALUE)) { - titleString = UNAVAILABLE; - } - - metadata.put("datacite.creator", authorString); - metadata.put("datacite.title", titleString); - metadata.put("datacite.publisher", producerString); - metadata.put("datacite.publicationyear", generateYear(dvObjectIn)); - return metadata; - } - - protected Map addDOIMetadataForDestroyedDataset(DvObject dvObjectIn) { - Map metadata = new HashMap<>(); - String authorString = UNAVAILABLE; - String producerString = UNAVAILABLE; - String titleString = "This item has been removed from publication"; - - metadata.put("datacite.creator", authorString); - metadata.put("datacite.title", titleString); - metadata.put("datacite.publisher", producerString); - metadata.put("datacite.publicationyear", "9999"); - return metadata; - } - - protected String getTargetUrl(DvObject dvObjectIn) { - logger.log(Level.FINE,"getTargetUrl"); - return systemConfig.getDataverseSiteUrl() + dvObjectIn.getTargetUrl() + dvObjectIn.getGlobalId().asString(); - } - - @Override - public String getIdentifier(DvObject dvObject) { - GlobalId gid = dvObject.getGlobalId(); - return gid != null ? gid.asString() : null; - } - - protected String generateYear (DvObject dvObjectIn){ - return dvObjectIn.getYearPublishedCreated(); - } - - public Map getMetadataForTargetURL(DvObject dvObject) { - logger.log(Level.FINE,"getMetadataForTargetURL"); - HashMap metadata = new HashMap<>(); - metadata.put("_target", getTargetUrl(dvObject)); - return metadata; - } - - @Override - public boolean alreadyRegistered(DvObject dvo) throws Exception { - if(dvo==null) { - logger.severe("Null DvObject sent to alreadyRegistered()."); - return false; - } - GlobalId globalId = dvo.getGlobalId(); - if(globalId == null) { - return false; - } - return alreadyRegistered(globalId, false); - } - - public abstract boolean alreadyRegistered(GlobalId globalId, boolean noProviderDefault) throws Exception; - - /* - * ToDo: the DvObject being sent in provides partial support for the case where - * it has a different authority/protocol than what is configured (i.e. a legacy - * Pid that can actually be updated by the Pid account being used.) Removing - * this now would potentially break/make it harder to handle that case prior to - * support for configuring multiple Pid providers. Once that exists, it would be - * cleaner to always find the PidProvider associated with the - * protocol/authority/shoulder of the current dataset and then not pass the - * DvObject as a param. (This would also remove calls to get the settings since - * that would be done at construction.) - */ - @Override - public DvObject generateIdentifier(DvObject dvObject) { - - String protocol = dvObject.getProtocol() == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Protocol) : dvObject.getProtocol(); - String authority = dvObject.getAuthority() == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Authority) : dvObject.getAuthority(); - if (dvObject.isInstanceofDataset()) { - dvObject.setIdentifier(generateDatasetIdentifier((Dataset) dvObject)); - } else { - dvObject.setIdentifier(generateDataFileIdentifier((DataFile) dvObject)); - } - if (dvObject.getProtocol() == null) { - dvObject.setProtocol(protocol); - } - if (dvObject.getAuthority() == null) { - dvObject.setAuthority(authority); - } - return dvObject; - } - - //ToDo just send the DvObject.DType - public String generateDatasetIdentifier(Dataset dataset) { - //ToDo - track these in the bean - String identifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); - String shoulder = settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder, ""); - - switch (identifierType) { - case "randomString": - return generateIdentifierAsRandomString(dataset, shoulder); - case "storedProcGenerated": - return generateIdentifierFromStoredProcedureIndependent(dataset, shoulder); - default: - /* Should we throw an exception instead?? -- L.A. 4.6.2 */ - return generateIdentifierAsRandomString(dataset, shoulder); - } - } - - - /** - * Check that a identifier entered by the user is unique (not currently used - * for any other study in this Dataverse Network) also check for duplicate - * in EZID if needed - * @param userIdentifier - * @param dataset - * @return {@code true} if the identifier is unique, {@code false} otherwise. - */ - public boolean isGlobalIdUnique(GlobalId globalId) { - if ( ! dvObjectService.isGlobalIdLocallyUnique(globalId) ) { - return false; // duplication found in local database - } - - // not in local DB, look in the persistent identifier service - try { - return ! alreadyRegistered(globalId, false); - } catch (Exception e){ - //we can live with failure - means identifier not found remotely - } - - return true; - } - - /** - * Parse a Persistent Id and set the protocol, authority, and identifier - * - * Example 1: doi:10.5072/FK2/BYM3IW - * protocol: doi - * authority: 10.5072 - * identifier: FK2/BYM3IW - * - * Example 2: hdl:1902.1/111012 - * protocol: hdl - * authority: 1902.1 - * identifier: 111012 - * - * @param identifierString - * @param separator the string that separates the authority from the identifier. - * @param destination the global id that will contain the parsed data. - * @return {@code destination}, after its fields have been updated, or - * {@code null} if parsing failed. - */ - @Override - public GlobalId parsePersistentId(String fullIdentifierString) { - if(!isConfigured()) { - return null; - } - // Occasionally, the protocol separator character ':' comes in still - // URL-encoded as %3A (usually as a result of the URL having been - // encoded twice): - fullIdentifierString = fullIdentifierString.replace("%3A", ":"); - - int index1 = fullIdentifierString.indexOf(':'); - if (index1 > 0) { // ':' found with one or more characters before it - String protocol = fullIdentifierString.substring(0, index1); - GlobalId globalId = parsePersistentId(protocol, fullIdentifierString.substring(index1+1)); - return globalId; - } - logger.log(Level.INFO, "Error parsing identifier: {0}: '':'' not found in string", fullIdentifierString); - return null; - } - - protected GlobalId parsePersistentId(String protocol, String identifierString) { - if(!isConfigured()) { - return null; - } - String authority; - String identifier; - if (identifierString == null) { - return null; - } - int index = identifierString.indexOf('/'); - if (index > 0 && (index + 1) < identifierString.length()) { - // '/' found with one or more characters - // before and after it - // Strip any whitespace, ; and ' from authority (should finding them cause a - // failure instead?) - authority = GlobalIdServiceBean.formatIdentifierString(identifierString.substring(0, index)); - if (GlobalIdServiceBean.testforNullTerminator(authority)) { - return null; - } - identifier = GlobalIdServiceBean.formatIdentifierString(identifierString.substring(index + 1)); - if (GlobalIdServiceBean.testforNullTerminator(identifier)) { - return null; - } - } else { - logger.log(Level.INFO, "Error parsing identifier: {0}: '':/'' not found in string", - identifierString); - return null; - } - return parsePersistentId(protocol, authority, identifier); - } - - public GlobalId parsePersistentId(String protocol, String authority, String identifier) { - if(!isConfigured()) { - return null; - } - logger.fine("Parsing: " + protocol + ":" + authority + getSeparator() + identifier + " in " + getProviderInformation().get(0)); - if(!GlobalIdServiceBean.isValidGlobalId(protocol, authority, identifier)) { - return null; - } - return new GlobalId(protocol, authority, identifier, getSeparator(), getUrlPrefix(), - getProviderInformation().get(0)); - } - - - public String getSeparator() { - //The standard default - return "/"; - } - - @Override - public String generateDataFileIdentifier(DataFile datafile) { - String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); - String doiDataFileFormat = settingsService.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, SystemConfig.DataFilePIDFormat.DEPENDENT.toString()); - - String prepend = ""; - if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.DEPENDENT.toString())){ - //If format is dependent then pre-pend the dataset identifier - prepend = datafile.getOwner().getIdentifier() + "/"; - datafile.setProtocol(datafile.getOwner().getProtocol()); - datafile.setAuthority(datafile.getOwner().getAuthority()); - } else { - //If there's a shoulder prepend independent identifiers with it - prepend = settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder, ""); - datafile.setProtocol(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol)); - datafile.setAuthority(settingsService.getValueForKey(SettingsServiceBean.Key.Authority)); - } - - switch (doiIdentifierType) { - case "randomString": - return generateIdentifierAsRandomString(datafile, prepend); - case "storedProcGenerated": - if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.INDEPENDENT.toString())){ - return generateIdentifierFromStoredProcedureIndependent(datafile, prepend); - } else { - return generateIdentifierFromStoredProcedureDependent(datafile, prepend); - } - default: - /* Should we throw an exception instead?? -- L.A. 4.6.2 */ - return generateIdentifierAsRandomString(datafile, prepend); - } - } - - - /* - * This method checks locally for a DvObject with the same PID and if that is OK, checks with the PID service. - * @param dvo - the object to check (ToDo - get protocol/authority from this PidProvider object) - * @param prepend - for Datasets, this is always the shoulder, for DataFiles, it could be the shoulder or the parent Dataset identifier - */ - private String generateIdentifierAsRandomString(DvObject dvo, String prepend) { - String identifier = null; - do { - identifier = prepend + RandomStringUtils.randomAlphanumeric(6).toUpperCase(); - } while (!isGlobalIdUnique(new GlobalId(dvo.getProtocol(), dvo.getAuthority(), identifier, this.getSeparator(), this.getUrlPrefix(), this.getProviderInformation().get(0)))); - - return identifier; - } - - /* - * This method checks locally for a DvObject with the same PID and if that is OK, checks with the PID service. - * @param dvo - the object to check (ToDo - get protocol/authority from this PidProvider object) - * @param prepend - for Datasets, this is always the shoulder, for DataFiles, it could be the shoulder or the parent Dataset identifier - */ - - private String generateIdentifierFromStoredProcedureIndependent(DvObject dvo, String prepend) { - String identifier; - do { - String identifierFromStoredProcedure = dvObjectService.generateNewIdentifierByStoredProcedure(); - // some diagnostics here maybe - is it possible to determine that it's failing - // because the stored procedure hasn't been created in the database? - if (identifierFromStoredProcedure == null) { - return null; - } - identifier = prepend + identifierFromStoredProcedure; - } while (!isGlobalIdUnique(new GlobalId(dvo.getProtocol(), dvo.getAuthority(), identifier, this.getSeparator(), this.getUrlPrefix(), this.getProviderInformation().get(0)))); - - return identifier; - } - - /*This method is only used for DataFiles with DEPENDENT Pids. It is not for Datasets - * - */ - private String generateIdentifierFromStoredProcedureDependent(DataFile datafile, String prepend) { - String identifier; - Long retVal; - retVal = Long.valueOf(0L); - //ToDo - replace loops with one lookup for largest entry? (the do loop runs ~n**2/2 calls). The check for existingIdentifiers means this is mostly a local loop now, versus involving db or PidProvider calls, but still...) - - // This will catch identifiers already assigned in the current transaction (e.g. - // in FinalizeDatasetPublicationCommand) that haven't been committed to the db - // without having to make a call to the PIDProvider - Set existingIdentifiers = new HashSet(); - List files = datafile.getOwner().getFiles(); - for(DataFile f:files) { - existingIdentifiers.add(f.getIdentifier()); - } - - do { - retVal++; - identifier = prepend + retVal.toString(); - - } while (existingIdentifiers.contains(identifier) || !isGlobalIdUnique(new GlobalId(datafile.getProtocol(), datafile.getAuthority(), identifier, this.getSeparator(), this.getUrlPrefix(), this.getProviderInformation().get(0)))); - - return identifier; - } - - - class GlobalIdMetadataTemplate { - - - private String template; - - public GlobalIdMetadataTemplate(){ - try (InputStream in = GlobalIdMetadataTemplate.class.getResourceAsStream("datacite_metadata_template.xml")) { - template = Util.readAndClose(in, "utf-8"); - } catch (Exception e) { - logger.log(Level.SEVERE, "datacite metadata template load error"); - logger.log(Level.SEVERE, "String " + e.toString()); - logger.log(Level.SEVERE, "localized message " + e.getLocalizedMessage()); - logger.log(Level.SEVERE, "cause " + e.getCause()); - logger.log(Level.SEVERE, "message " + e.getMessage()); - } - } - - private String xmlMetadata; - private String identifier; - private List datafileIdentifiers; - private List creators; - private String title; - private String publisher; - private String publisherYear; - private List authors; - private String description; - private List contacts; - private List producers; - - public List getProducers() { - return producers; - } - - public void setProducers(List producers) { - this.producers = producers; - } - - public List getContacts() { - return contacts; - } - - public void setContacts(List contacts) { - this.contacts = contacts; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } - - public List getAuthors() { - return authors; - } - - public void setAuthors(List authors) { - this.authors = authors; - } - - - public List getDatafileIdentifiers() { - return datafileIdentifiers; - } - - public void setDatafileIdentifiers(List datafileIdentifiers) { - this.datafileIdentifiers = datafileIdentifiers; - } - - public GlobalIdMetadataTemplate(String xmlMetaData) { - this.xmlMetadata = xmlMetaData; - Document doc = Jsoup.parseBodyFragment(xmlMetaData); - Elements identifierElements = doc.select("identifier"); - if (identifierElements.size() > 0) { - identifier = identifierElements.get(0).html(); - } - Elements creatorElements = doc.select("creatorName"); - creators = new ArrayList<>(); - for (Element creatorElement : creatorElements) { - creators.add(creatorElement.html()); - } - Elements titleElements = doc.select("title"); - if (titleElements.size() > 0) { - title = titleElements.get(0).html(); - } - Elements publisherElements = doc.select("publisher"); - if (publisherElements.size() > 0) { - publisher = publisherElements.get(0).html(); - } - Elements publisherYearElements = doc.select("publicationYear"); - if (publisherYearElements.size() > 0) { - publisherYear = publisherYearElements.get(0).html(); - } - } - - public String generateXML(DvObject dvObject) { - // Can't use "UNKNOWN" here because DataCite will respond with "[facet 'pattern'] the value 'unknown' is not accepted by the pattern '[\d]{4}'" - String publisherYearFinal = "9999"; - // FIXME: Investigate why this.publisherYear is sometimes null now that pull request #4606 has been merged. - if (this.publisherYear != null) { - // Added to prevent a NullPointerException when trying to destroy datasets when using DataCite rather than EZID. - publisherYearFinal = this.publisherYear; - } - xmlMetadata = template.replace("${identifier}", getIdentifier().trim()) - .replace("${title}", this.title) - .replace("${publisher}", this.publisher) - .replace("${publisherYear}", publisherYearFinal) - .replace("${description}", this.description); - StringBuilder creatorsElement = new StringBuilder(); - for (DatasetAuthor author : authors) { - creatorsElement.append(""); - creatorsElement.append(author.getName().getDisplayValue()); - creatorsElement.append(""); - - if (author.getIdType() != null && author.getIdValue() != null && !author.getIdType().isEmpty() && !author.getIdValue().isEmpty() && author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { - - if (author.getIdType().equals("ORCID")) { - creatorsElement.append("" + author.getIdValue() + ""); - } - if (author.getIdType().equals("ISNI")) { - creatorsElement.append("" + author.getIdValue() + ""); - } - if (author.getIdType().equals("LCNA")) { - creatorsElement.append("" + author.getIdValue() + ""); - } - } - if (author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { - creatorsElement.append("" + author.getAffiliation().getDisplayValue() + ""); - } - creatorsElement.append(""); - } - xmlMetadata = xmlMetadata.replace("${creators}", creatorsElement.toString()); - - StringBuilder contributorsElement = new StringBuilder(); - for (String[] contact : this.getContacts()) { - if (!contact[0].isEmpty()) { - contributorsElement.append("" + contact[0] + ""); - if (!contact[1].isEmpty()) { - contributorsElement.append("" + contact[1] + ""); - } - contributorsElement.append(""); - } - } - for (String[] producer : this.getProducers()) { - contributorsElement.append("" + producer[0] + ""); - if (!producer[1].isEmpty()) { - contributorsElement.append("" + producer[1] + ""); - } - contributorsElement.append(""); - } - - String relIdentifiers = generateRelatedIdentifiers(dvObject); - - xmlMetadata = xmlMetadata.replace("${relatedIdentifiers}", relIdentifiers); - - xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString()); - return xmlMetadata; - } - - private String generateRelatedIdentifiers(DvObject dvObject) { - - StringBuilder sb = new StringBuilder(); - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { - - datafileIdentifiers = new ArrayList<>(); - for (DataFile dataFile : dataset.getFiles()) { - if (!dataFile.getGlobalId().asString().isEmpty()) { - if (sb.toString().isEmpty()) { - sb.append(""); - } - sb.append("" + dataFile.getGlobalId() + ""); - } - } - - if (!sb.toString().isEmpty()) { - sb.append(""); - } - } - } else if (dvObject.isInstanceofDataFile()) { - DataFile df = (DataFile) dvObject; - sb.append(""); - sb.append("" + df.getOwner().getGlobalId() + ""); - sb.append(""); - } - return sb.toString(); - } - - public void generateFileIdentifiers(DvObject dvObject) { - - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; - - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { - - datafileIdentifiers = new ArrayList<>(); - for (DataFile dataFile : dataset.getFiles()) { - datafileIdentifiers.add(dataFile.getIdentifier()); - int x = xmlMetadata.indexOf("") - 1; - xmlMetadata = xmlMetadata.replace("{relatedIdentifier}", dataFile.getIdentifier()); - xmlMetadata = xmlMetadata.substring(0, x) + "${relatedIdentifier}" + template.substring(x, template.length() - 1); - - } - - } else { - xmlMetadata = xmlMetadata.replace("${relatedIdentifier}", ""); - } - } - } - - public String getTemplate() { - return template; - } - - public void setTemplate(String templateIn) { - template = templateIn; - } - - public String getIdentifier() { - return identifier; - } - - public void setIdentifier(String identifier) { - this.identifier = identifier; - } - - public List getCreators() { - return creators; - } - - public void setCreators(List creators) { - this.creators = creators; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getPublisher() { - return publisher; - } - - public void setPublisher(String publisher) { - this.publisher = publisher; - } - - public String getPublisherYear() { - return publisherYear; - } - - public void setPublisherYear(String publisherYear) { - this.publisherYear = publisherYear; - } -} - public String getMetadataFromDvObject(String identifier, Map metadata, DvObject dvObject) { - - Dataset dataset = null; - - if (dvObject instanceof Dataset) { - dataset = (Dataset) dvObject; - } else { - dataset = (Dataset) dvObject.getOwner(); - } - - GlobalIdMetadataTemplate metadataTemplate = new GlobalIdMetadataTemplate(); - metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); - metadataTemplate.setCreators(Util.getListFromStr(metadata.get("datacite.creator"))); - metadataTemplate.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); - if (dvObject.isInstanceofDataset()) { - metadataTemplate.setDescription(dataset.getLatestVersion().getDescriptionPlainText()); - } - if (dvObject.isInstanceofDataFile()) { - DataFile df = (DataFile) dvObject; - String fileDescription = df.getDescription(); - metadataTemplate.setDescription(fileDescription == null ? "" : fileDescription); - } - - metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); - metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers()); - metadataTemplate.setTitle(dvObject.getCurrentName()); - String producerString = dataverseService.getRootDataverseName(); - if (producerString.isEmpty() || producerString.equals(DatasetField.NA_VALUE) ) { - producerString = UNAVAILABLE; - } - metadataTemplate.setPublisher(producerString); - metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); - - String xmlMetadata = metadataTemplate.generateXML(dvObject); - logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); - return xmlMetadata; - } - - @Override - public boolean canManagePID() { - //The default expectation is that PID providers are configured to manage some set (i.e. based on protocol/authority/shoulder) of PIDs - return true; - } - - @Override - public boolean isConfigured() { - if(configured==null) { - return false; - } else { - return configured.booleanValue(); - } - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java index 8c96f98ce39..363622ba3bf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -2,6 +2,7 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -46,6 +47,8 @@ public class AuxiliaryFileServiceBean implements java.io.Serializable { @EJB private SystemConfig systemConfig; + @EJB + StorageUseServiceBean storageUseService; public AuxiliaryFile find(Object pk) { return em.find(AuxiliaryFile.class, pk); @@ -126,6 +129,13 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile } dataFile.getAuxiliaryFiles().add(auxFile); } + // We've just added this file to storage; increment the StorageUse + // record if needed. + if (auxFile.getFileSize() != null + && auxFile.getFileSize() > 0 + && dataFile.getOwner() != null ) { + storageUseService.incrementStorageSizeRecursively(dataFile.getOwner().getId(), auxFile.getFileSize()); + } } catch (IOException ioex) { logger.severe("IO Exception trying to save auxiliary file: " + ioex.getMessage()); throw new InternalServerErrorException(); @@ -181,6 +191,7 @@ public void deleteAuxiliaryFile(DataFile dataFile, String formatTag, String form if (af == null) { throw new FileNotFoundException(); } + Long auxFileSize = af.getFileSize(); em.remove(af); StorageIO storageIO; storageIO = dataFile.getStorageIO(); @@ -188,6 +199,14 @@ public void deleteAuxiliaryFile(DataFile dataFile, String formatTag, String form if (storageIO.isAuxObjectCached(auxExtension)) { storageIO.deleteAuxObject(auxExtension); } + // We've just deleted this file from storage; update the StorageUse + // record if needed. + if (auxFileSize != null + && auxFileSize > 0 + && dataFile.getOwner() != null) { + storageUseService.incrementStorageSizeRecursively(dataFile.getOwner().getId(), (0L - auxFileSize)); + } + } public List findAuxiliaryFiles(DataFile dataFile) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterCache.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterCache.java deleted file mode 100644 index 7c75b1a4da6..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterCache.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package edu.harvard.iq.dataverse; - - -import java.io.Serializable; -import jakarta.persistence.Column; -import jakarta.persistence.Entity; -import jakarta.persistence.GeneratedValue; -import jakarta.persistence.GenerationType; -import jakarta.persistence.Id; -import jakarta.persistence.Lob; -import jakarta.persistence.NamedQueries; -import jakarta.persistence.NamedQuery; -import org.hibernate.validator.constraints.NotBlank; - -/** - * - * @author luopc - */ -@NamedQueries( - @NamedQuery( name="DOIDataCiteRegisterCache.findByDoi", - query="SELECT d FROM DOIDataCiteRegisterCache d WHERE d.doi=:doi") -) -@Entity -public class DOIDataCiteRegisterCache implements Serializable{ - - private static final long serialVersionUID = 8030143094734315681L; - - @Id - @GeneratedValue(strategy = GenerationType.IDENTITY) - private Long id; - - @NotBlank - @Column(unique=true) - private String doi; - - @NotBlank - private String url; - - @NotBlank - private String status; - - @NotBlank - @Lob - private String xml; - - public Long getId() { - return id; - } - - public void setId(Long id) { - this.id = id; - } - - public String getDoi() { - return doi; - } - - public void setDoi(String doi) { - this.doi = doi; - } - - public String getStatus() { - return status; - } - - public void setStatus(String status) { - this.status = status; - } - - public String getXml() { - return xml; - } - - public void setXml(String xml) { - this.xml = xml; - } - - public String getUrl() { - return url; - } - - public void setUrl(String url) { - this.url = url; - } -} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java deleted file mode 100644 index 9ecc4a3ecc9..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java +++ /dev/null @@ -1,707 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package edu.harvard.iq.dataverse; - -import edu.harvard.iq.dataverse.branding.BrandingUtil; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.logging.Level; -import java.util.logging.Logger; -import jakarta.ejb.EJB; -import jakarta.ejb.Stateless; -import jakarta.persistence.EntityManager; -import jakarta.persistence.PersistenceContext; -import jakarta.persistence.TypedQuery; - -import edu.harvard.iq.dataverse.settings.JvmSettings; -import org.apache.commons.text.StringEscapeUtils; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; - -/** - * - * @author luopc - */ -@Stateless -public class DOIDataCiteRegisterService { - - private static final Logger logger = Logger.getLogger(DOIDataCiteRegisterService.class.getCanonicalName()); - - @PersistenceContext(unitName = "VDCNet-ejbPU") - private EntityManager em; - - @EJB - DataverseServiceBean dataverseService; - - @EJB - DOIDataCiteServiceBean doiDataCiteServiceBean; - - - //A singleton since it, and the httpClient in it can be reused. - private DataCiteRESTfullClient client=null; - - private DataCiteRESTfullClient getClient() throws IOException { - if (client == null) { - client = new DataCiteRESTfullClient( - JvmSettings.DATACITE_MDS_API_URL.lookup(), - JvmSettings.DATACITE_USERNAME.lookup(), - JvmSettings.DATACITE_PASSWORD.lookup() - ); - } - return client; - } - - /** - * This method is deprecated and unused. We switched away from this method - * when adjusting the code to reserve DOIs from DataCite on dataset create. - * - * Note that the DOIDataCiteRegisterCache entity/table used in this method - * might be a candidate for deprecation as well. Removing it would require - * some refactoring as it is used throughout the DataCite code. - */ - @Deprecated - public String createIdentifierLocal(String identifier, Map metadata, DvObject dvObject) { - - String xmlMetadata = getMetadataFromDvObject(identifier, metadata, dvObject); - String status = metadata.get("_status").trim(); - String target = metadata.get("_target"); - String retString = ""; - DOIDataCiteRegisterCache rc = findByDOI(identifier); - if (rc == null) { - rc = new DOIDataCiteRegisterCache(); - rc.setDoi(identifier); - rc.setXml(xmlMetadata); - rc.setStatus("reserved"); - rc.setUrl(target); - em.persist(rc); - } else { - rc.setDoi(identifier); - rc.setXml(xmlMetadata); - rc.setStatus("reserved"); - rc.setUrl(target); - } - retString = "success to reserved " + identifier; - - return retString; - } - - /** - * This "reserveIdentifier" method is heavily based on the - * "registerIdentifier" method below but doesn't, this one doesn't doesn't - * register a URL, which causes the "state" of DOI to transition from - * "draft" to "findable". Here are some DataCite docs on the matter: - * - * "DOIs can exist in three states: draft, registered, and findable. DOIs - * are in the draft state when metadata have been registered, and will - * transition to the findable state when registering a URL." -- - * https://support.datacite.org/docs/mds-api-guide#doi-states - */ - public String reserveIdentifier(String identifier, Map metadata, DvObject dvObject) throws IOException { - String retString = ""; - String xmlMetadata = getMetadataFromDvObject(identifier, metadata, dvObject); - DOIDataCiteRegisterCache rc = findByDOI(identifier); - String target = metadata.get("_target"); - if (rc != null) { - rc.setDoi(identifier); - rc.setXml(xmlMetadata); - // DataCite uses the term "draft" instead of "reserved". - rc.setStatus("reserved"); - if (target == null || target.trim().length() == 0) { - target = rc.getUrl(); - } else { - rc.setUrl(target); - } - } - - DataCiteRESTfullClient client = getClient(); - retString = client.postMetadata(xmlMetadata); - - return retString; - } - - public String registerIdentifier(String identifier, Map metadata, DvObject dvObject) throws IOException { - String retString = ""; - String xmlMetadata = getMetadataFromDvObject(identifier, metadata, dvObject); - DOIDataCiteRegisterCache rc = findByDOI(identifier); - String target = metadata.get("_target"); - if (rc != null) { - rc.setDoi(identifier); - rc.setXml(xmlMetadata); - rc.setStatus("public"); - if (target == null || target.trim().length() == 0) { - target = rc.getUrl(); - } else { - rc.setUrl(target); - } - } - - DataCiteRESTfullClient client = getClient(); - retString = client.postMetadata(xmlMetadata); - client.postUrl(identifier.substring(identifier.indexOf(":") + 1), target); - - return retString; - } - - public String deactivateIdentifier(String identifier, Map metadata, DvObject dvObject) throws IOException { - String retString = ""; - - String metadataString = getMetadataForDeactivateIdentifier(identifier, metadata, dvObject); - retString = client.postMetadata(metadataString); - retString = client.inactiveDataset(identifier.substring(identifier.indexOf(":") + 1)); - - return retString; - } - - public static String getMetadataFromDvObject(String identifier, Map metadata, DvObject dvObject) { - - Dataset dataset = null; - - if (dvObject instanceof Dataset) { - dataset = (Dataset) dvObject; - } else { - dataset = (Dataset) dvObject.getOwner(); - } - - DataCiteMetadataTemplate metadataTemplate = new DataCiteMetadataTemplate(); - metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); - metadataTemplate.setCreators(Util.getListFromStr(metadata.get("datacite.creator"))); - metadataTemplate.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); - if (dvObject.isInstanceofDataset()) { - //While getDescriptionPlainText strips < and > from HTML, it leaves '&' (at least so we need to xml escape as well - String description = StringEscapeUtils.escapeXml10(dataset.getLatestVersion().getDescriptionPlainText()); - if (description.isEmpty() || description.equals(DatasetField.NA_VALUE)) { - description = AbstractGlobalIdServiceBean.UNAVAILABLE; - } - metadataTemplate.setDescription(description); - } - if (dvObject.isInstanceofDataFile()) { - DataFile df = (DataFile) dvObject; - //Note: File metadata is not escaped like dataset metadata is, so adding an xml escape here. - //This could/should be removed if the datafile methods add escaping - String fileDescription = StringEscapeUtils.escapeXml10(df.getDescription()); - metadataTemplate.setDescription(fileDescription == null ? AbstractGlobalIdServiceBean.UNAVAILABLE : fileDescription); - String datasetPid = df.getOwner().getGlobalId().asString(); - metadataTemplate.setDatasetIdentifier(datasetPid); - } else { - metadataTemplate.setDatasetIdentifier(""); - } - - metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); - metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers()); - String title = dvObject.getCurrentName(); - if(dvObject.isInstanceofDataFile()) { - //Note file title is not currently escaped the way the dataset title is, so adding it here. - title = StringEscapeUtils.escapeXml10(title); - } - - if (title.isEmpty() || title.equals(DatasetField.NA_VALUE)) { - title = AbstractGlobalIdServiceBean.UNAVAILABLE; - } - - metadataTemplate.setTitle(title); - String producerString = BrandingUtil.getRootDataverseCollectionName(); - if (producerString.isEmpty() || producerString.equals(DatasetField.NA_VALUE)) { - producerString = AbstractGlobalIdServiceBean.UNAVAILABLE; - } - metadataTemplate.setPublisher(producerString); - metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); - - String xmlMetadata = metadataTemplate.generateXML(dvObject); - logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); - return xmlMetadata; - } - - public static String getMetadataForDeactivateIdentifier(String identifier, Map metadata, DvObject dvObject) { - - DataCiteMetadataTemplate metadataTemplate = new DataCiteMetadataTemplate(); - metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); - metadataTemplate.setCreators(Util.getListFromStr(metadata.get("datacite.creator"))); - - metadataTemplate.setDescription(AbstractGlobalIdServiceBean.UNAVAILABLE); - - String title =metadata.get("datacite.title"); - - System.out.print("Map metadata title: "+ metadata.get("datacite.title")); - - metadataTemplate.setAuthors(null); - - metadataTemplate.setTitle(title); - String producerString = AbstractGlobalIdServiceBean.UNAVAILABLE; - - metadataTemplate.setPublisher(producerString); - metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); - - String xmlMetadata = metadataTemplate.generateXML(dvObject); - logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); - return xmlMetadata; - } - - public String modifyIdentifier(String identifier, HashMap metadata, DvObject dvObject) throws IOException { - - String xmlMetadata = getMetadataFromDvObject(identifier, metadata, dvObject); - - logger.fine("XML to send to DataCite: " + xmlMetadata); - - String status = metadata.get("_status").trim(); - String target = metadata.get("_target"); - String retString = ""; - if (status.equals("reserved")) { - DOIDataCiteRegisterCache rc = findByDOI(identifier); - if (rc == null) { - rc = new DOIDataCiteRegisterCache(); - rc.setDoi(identifier); - rc.setXml(xmlMetadata); - rc.setStatus("reserved"); - rc.setUrl(target); - em.persist(rc); - } else { - rc.setDoi(identifier); - rc.setXml(xmlMetadata); - rc.setStatus("reserved"); - rc.setUrl(target); - } - retString = "success to reserved " + identifier; - } else if (status.equals("public")) { - DOIDataCiteRegisterCache rc = findByDOI(identifier); - if (rc != null) { - rc.setDoi(identifier); - rc.setXml(xmlMetadata); - rc.setStatus("public"); - if (target == null || target.trim().length() == 0) { - target = rc.getUrl(); - } else { - rc.setUrl(target); - } - try { - DataCiteRESTfullClient client = getClient(); - retString = client.postMetadata(xmlMetadata); - client.postUrl(identifier.substring(identifier.indexOf(":") + 1), target); - - } catch (UnsupportedEncodingException ex) { - logger.log(Level.SEVERE, null, ex); - - } catch (RuntimeException rte) { - logger.log(Level.SEVERE, "Error creating DOI at DataCite: {0}", rte.getMessage()); - logger.log(Level.SEVERE, "Exception", rte); - - } - } - } else if (status.equals("unavailable")) { - DOIDataCiteRegisterCache rc = findByDOI(identifier); - try { - DataCiteRESTfullClient client = getClient(); - if (rc != null) { - rc.setStatus("unavailable"); - retString = client.inactiveDataset(identifier.substring(identifier.indexOf(":") + 1)); - } - } catch (IOException io) { - - } - } - return retString; - } - - public boolean testDOIExists(String identifier) { - boolean doiExists; - try { - DataCiteRESTfullClient client = getClient(); - doiExists = client.testDOIExists(identifier.substring(identifier.indexOf(":") + 1)); - } catch (Exception e) { - logger.log(Level.INFO, identifier, e); - return false; - } - return doiExists; - } - - public HashMap getMetadata(String identifier) throws IOException { - HashMap metadata = new HashMap<>(); - try { - DataCiteRESTfullClient client = getClient(); - String xmlMetadata = client.getMetadata(identifier.substring(identifier.indexOf(":") + 1)); - DOIDataCiteServiceBean.GlobalIdMetadataTemplate template = doiDataCiteServiceBean.new GlobalIdMetadataTemplate(xmlMetadata); - metadata.put("datacite.creator", Util.getStrFromList(template.getCreators())); - metadata.put("datacite.title", template.getTitle()); - metadata.put("datacite.publisher", template.getPublisher()); - metadata.put("datacite.publicationyear", template.getPublisherYear()); - DOIDataCiteRegisterCache rc = findByDOI(identifier); - if (rc != null) { - metadata.put("_status", rc.getStatus()); - } else { - metadata.put("_status", "public"); - } - } catch (RuntimeException e) { - logger.log(Level.INFO, identifier, e); - } - return metadata; - } - - public DOIDataCiteRegisterCache findByDOI(String doi) { - TypedQuery query = em.createNamedQuery("DOIDataCiteRegisterCache.findByDoi", - DOIDataCiteRegisterCache.class); - query.setParameter("doi", doi); - List rc = query.getResultList(); - if (rc.size() == 1) { - return rc.get(0); - } - return null; - } - - public void deleteIdentifier(String identifier) { - DOIDataCiteRegisterCache rc = findByDOI(identifier); - if (rc != null) { - em.remove(rc); - } - } - -} - -class DataCiteMetadataTemplate { - - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.DataCiteMetadataTemplate"); - private static String template; - - static { - try (InputStream in = DataCiteMetadataTemplate.class.getResourceAsStream("datacite_metadata_template.xml")) { - template = Util.readAndClose(in, "utf-8"); - } catch (Exception e) { - logger.log(Level.SEVERE, "datacite metadata template load error"); - logger.log(Level.SEVERE, "String " + e.toString()); - logger.log(Level.SEVERE, "localized message " + e.getLocalizedMessage()); - logger.log(Level.SEVERE, "cause " + e.getCause()); - logger.log(Level.SEVERE, "message " + e.getMessage()); - } - } - - private String xmlMetadata; - private String identifier; - private String datasetIdentifier; - private List datafileIdentifiers; - private List creators; - private String title; - private String publisher; - private String publisherYear; - private List authors; - private String description; - private List contacts; - private List producers; - - public List getProducers() { - return producers; - } - - public void setProducers(List producers) { - this.producers = producers; - } - - public List getContacts() { - return contacts; - } - - public void setContacts(List contacts) { - this.contacts = contacts; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } - - public List getAuthors() { - return authors; - } - - public void setAuthors(List authors) { - this.authors = authors; - } - - public DataCiteMetadataTemplate() { - } - - public List getDatafileIdentifiers() { - return datafileIdentifiers; - } - - public void setDatafileIdentifiers(List datafileIdentifiers) { - this.datafileIdentifiers = datafileIdentifiers; - } - - public DataCiteMetadataTemplate(String xmlMetaData) { - this.xmlMetadata = xmlMetaData; - Document doc = Jsoup.parseBodyFragment(xmlMetaData); - Elements identifierElements = doc.select("identifier"); - if (identifierElements.size() > 0) { - identifier = identifierElements.get(0).html(); - } - Elements creatorElements = doc.select("creatorName"); - creators = new ArrayList<>(); - for (Element creatorElement : creatorElements) { - creators.add(creatorElement.html()); - } - Elements titleElements = doc.select("title"); - if (titleElements.size() > 0) { - title = titleElements.get(0).html(); - } - Elements publisherElements = doc.select("publisher"); - if (publisherElements.size() > 0) { - publisher = publisherElements.get(0).html(); - } - Elements publisherYearElements = doc.select("publicationYear"); - if (publisherYearElements.size() > 0) { - publisherYear = publisherYearElements.get(0).html(); - } - } - - public String generateXML(DvObject dvObject) { - // Can't use "UNKNOWN" here because DataCite will respond with "[facet 'pattern'] the value 'unknown' is not accepted by the pattern '[\d]{4}'" - String publisherYearFinal = "9999"; - // FIXME: Investigate why this.publisherYear is sometimes null now that pull request #4606 has been merged. - if (this.publisherYear != null) { - // Added to prevent a NullPointerException when trying to destroy datasets when using DataCite rather than EZID. - publisherYearFinal = this.publisherYear; - } - xmlMetadata = template.replace("${identifier}", this.identifier.trim()) - .replace("${title}", this.title) - .replace("${publisher}", this.publisher) - .replace("${publisherYear}", publisherYearFinal) - .replace("${description}", this.description); - - StringBuilder creatorsElement = new StringBuilder(); - if (authors!= null && !authors.isEmpty()) { - for (DatasetAuthor author : authors) { - creatorsElement.append(""); - creatorsElement.append(author.getName().getDisplayValue()); - creatorsElement.append(""); - - if (author.getIdType() != null && author.getIdValue() != null && !author.getIdType().isEmpty() && !author.getIdValue().isEmpty() && author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { - - if (author.getIdType().equals("ORCID")) { - creatorsElement.append("" + author.getIdValue() + ""); - } - if (author.getIdType().equals("ISNI")) { - creatorsElement.append("" + author.getIdValue() + ""); - } - if (author.getIdType().equals("LCNA")) { - creatorsElement.append("" + author.getIdValue() + ""); - } - } - if (author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { - creatorsElement.append("" + author.getAffiliation().getDisplayValue() + ""); - } - creatorsElement.append(""); - } - - } else { - creatorsElement.append("").append(AbstractGlobalIdServiceBean.UNAVAILABLE).append(""); - } - - xmlMetadata = xmlMetadata.replace("${creators}", creatorsElement.toString()); - - StringBuilder contributorsElement = new StringBuilder(); - if (this.getContacts() != null) { - for (String[] contact : this.getContacts()) { - if (!contact[0].isEmpty()) { - contributorsElement.append("" + contact[0] + ""); - if (!contact[1].isEmpty()) { - contributorsElement.append("" + contact[1] + ""); - } - contributorsElement.append(""); - } - } - } - - if (this.getProducers() != null) { - for (String[] producer : this.getProducers()) { - contributorsElement.append("" + producer[0] + ""); - if (!producer[1].isEmpty()) { - contributorsElement.append("" + producer[1] + ""); - } - contributorsElement.append(""); - } - } - - String relIdentifiers = generateRelatedIdentifiers(dvObject); - - xmlMetadata = xmlMetadata.replace("${relatedIdentifiers}", relIdentifiers); - - xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString()); - return xmlMetadata; - } - - private String generateRelatedIdentifiers(DvObject dvObject) { - - StringBuilder sb = new StringBuilder(); - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { - - datafileIdentifiers = new ArrayList<>(); - for (DataFile dataFile : dataset.getFiles()) { - if (dataFile.getGlobalId() != null) { - if (sb.toString().isEmpty()) { - sb.append(""); - } - sb.append("" + dataFile.getGlobalId() + ""); - } - } - - if (!sb.toString().isEmpty()) { - sb.append(""); - } - } - } else if (dvObject.isInstanceofDataFile()) { - DataFile df = (DataFile) dvObject; - sb.append(""); - sb.append("" + df.getOwner().getGlobalId() + ""); - sb.append(""); - } - return sb.toString(); - } - - public void generateFileIdentifiers(DvObject dvObject) { - - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; - - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { - - datafileIdentifiers = new ArrayList<>(); - for (DataFile dataFile : dataset.getFiles()) { - datafileIdentifiers.add(dataFile.getIdentifier()); - int x = xmlMetadata.indexOf("") - 1; - xmlMetadata = xmlMetadata.replace("{relatedIdentifier}", dataFile.getIdentifier()); - xmlMetadata = xmlMetadata.substring(0, x) + "${relatedIdentifier}" + template.substring(x, template.length() - 1); - - } - - } else { - xmlMetadata = xmlMetadata.replace("${relatedIdentifier}", ""); - } - } - } - - public static String getTemplate() { - return template; - } - - public static void setTemplate(String template) { - DataCiteMetadataTemplate.template = template; - } - - public String getIdentifier() { - return identifier; - } - - public void setIdentifier(String identifier) { - this.identifier = identifier; - } - - public void setDatasetIdentifier(String datasetIdentifier) { - this.datasetIdentifier = datasetIdentifier; - } - - public List getCreators() { - return creators; - } - - public void setCreators(List creators) { - this.creators = creators; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getPublisher() { - return publisher; - } - - public void setPublisher(String publisher) { - this.publisher = publisher; - } - - public String getPublisherYear() { - return publisherYear; - } - - public void setPublisherYear(String publisherYear) { - this.publisherYear = publisherYear; - } -} - -class Util { - - public static void close(InputStream in) { - if (in != null) { - try { - in.close(); - } catch (IOException e) { - throw new RuntimeException("Fail to close InputStream"); - } - } - } - - public static String readAndClose(InputStream inStream, String encoding) { - ByteArrayOutputStream outStream = new ByteArrayOutputStream(); - byte[] buf = new byte[128]; - String data; - try { - int cnt; - while ((cnt = inStream.read(buf)) >= 0) { - outStream.write(buf, 0, cnt); - } - data = outStream.toString(encoding); - } catch (IOException ioe) { - throw new RuntimeException("IOException"); - } finally { - close(inStream); - } - return data; - } - - public static List getListFromStr(String str) { - return Arrays.asList(str.split("; ")); -// List authors = new ArrayList(); -// int preIdx = 0; -// for(int i=0;i authors) { - StringBuilder str = new StringBuilder(); - for (String author : authors) { - if (str.length() > 0) { - str.append("; "); - } - str.append(author); - } - return str.toString(); - } - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java deleted file mode 100644 index 48786b41824..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java +++ /dev/null @@ -1,248 +0,0 @@ -package edu.harvard.iq.dataverse; - -import java.io.IOException; -import java.net.HttpURLConnection; -import java.net.URL; -import java.util.Base64; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.logging.Level; -import java.util.logging.Logger; - -import jakarta.ejb.EJB; -import jakarta.ejb.Stateless; - -import edu.harvard.iq.dataverse.settings.JvmSettings; -import org.apache.commons.httpclient.HttpException; -import org.apache.commons.httpclient.HttpStatus; - - -/** - * - * @author luopc - */ -@Stateless -public class DOIDataCiteServiceBean extends DOIServiceBean { - - private static final Logger logger = Logger.getLogger(DOIDataCiteServiceBean.class.getCanonicalName()); - - private static final String PUBLIC = "public"; - private static final String FINDABLE = "findable"; - private static final String RESERVED = "reserved"; - private static final String DRAFT = "draft"; - - @EJB - DOIDataCiteRegisterService doiDataCiteRegisterService; - - @Override - public boolean registerWhenPublished() { - return false; - } - - - - @Override - public boolean alreadyRegistered(GlobalId pid, boolean noProviderDefault) { - logger.log(Level.FINE,"alreadyRegistered"); - if(pid==null || pid.asString().isEmpty()) { - logger.fine("No identifier sent."); - return false; - } - boolean alreadyRegistered; - String identifier = pid.asString(); - try{ - alreadyRegistered = doiDataCiteRegisterService.testDOIExists(identifier); - } catch (Exception e){ - logger.log(Level.WARNING, "alreadyRegistered failed"); - return false; - } - return alreadyRegistered; - } - - @Override - public String createIdentifier(DvObject dvObject) throws Exception { - logger.log(Level.FINE,"createIdentifier"); - if(dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty() ){ - dvObject = generateIdentifier(dvObject); - } - String identifier = getIdentifier(dvObject); - Map metadata = getMetadataForCreateIndicator(dvObject); - metadata.put("_status", "reserved"); - try { - String retString = doiDataCiteRegisterService.reserveIdentifier(identifier, metadata, dvObject); - logger.log(Level.FINE, "create DOI identifier retString : " + retString); - return retString; - } catch (Exception e) { - logger.log(Level.WARNING, "Identifier not created: create failed", e); - throw e; - } - } - - @Override - public Map getIdentifierMetadata(DvObject dvObject) { - logger.log(Level.FINE,"getIdentifierMetadata"); - String identifier = getIdentifier(dvObject); - Map metadata = new HashMap<>(); - try { - metadata = doiDataCiteRegisterService.getMetadata(identifier); - } catch (Exception e) { - logger.log(Level.WARNING, "getIdentifierMetadata failed", e); - } - return metadata; - } - - - /** - * Modifies the DOI metadata for a Dataset - * @param dvObject the dvObject whose metadata needs to be modified - * @return the Dataset identifier, or null if the modification failed - * @throws java.lang.Exception - */ - @Override - public String modifyIdentifierTargetURL(DvObject dvObject) throws Exception { - logger.log(Level.FINE,"modifyIdentifier"); - String identifier = getIdentifier(dvObject); - try { - HashMap metadata = doiDataCiteRegisterService.getMetadata(identifier); - doiDataCiteRegisterService.modifyIdentifier(identifier, metadata, dvObject); - } catch (Exception e) { - logger.log(Level.WARNING, "modifyMetadata failed", e); - throw e; - } - return identifier; - } - - public void deleteRecordFromCache(Dataset datasetIn){ - logger.log(Level.FINE,"deleteRecordFromCache"); - String identifier = getIdentifier(datasetIn); - HashMap doiMetadata = new HashMap(); - try { - doiMetadata = doiDataCiteRegisterService.getMetadata(identifier); - } catch (Exception e) { - logger.log(Level.WARNING, "get matadata failed cannot delete"); - logger.log(Level.WARNING, "String {0}", e.toString()); - logger.log(Level.WARNING, "localized message {0}", e.getLocalizedMessage()); - logger.log(Level.WARNING, "cause", e.getCause()); - logger.log(Level.WARNING, "message {0}", e.getMessage()); - } - - String idStatus = (String) doiMetadata.get("_status"); - - if (idStatus == null || idStatus.equals("reserved")) { - logger.log(Level.WARNING, "Delete status is reserved.."); - try { - doiDataCiteRegisterService.deleteIdentifier(identifier); - } catch (Exception e) { - logger.log(Level.WARNING, "delete failed"); - logger.log(Level.WARNING, "String {0}", e.toString()); - logger.log(Level.WARNING, "localized message {0}", e.getLocalizedMessage()); - logger.log(Level.WARNING, "cause", e.getCause()); - logger.log(Level.WARNING, "message {0}", e.getMessage()); - throw new RuntimeException(e); - } - } - } - - /* - * Deletes a DOI if it is in DRAFT/RESERVED state or removes metadata and changes it from PUBLIC/FINDABLE to REGISTERED. - */ - @Override - public void deleteIdentifier(DvObject dvObject) throws IOException, HttpException { - logger.log(Level.FINE,"deleteIdentifier"); - String identifier = getIdentifier(dvObject); - //ToDo - PidUtils currently has a DataCite API call that would get the status at DataCite for this identifier - that could be more accurate than assuming based on whether the dvObject has been published - String idStatus = DRAFT; - if(dvObject.isReleased()) { - idStatus = PUBLIC; - } - if ( idStatus != null ) { - switch ( idStatus ) { - case RESERVED: - case DRAFT: - logger.log(Level.INFO, "Delete status is reserved.."); - //service only removes the identifier from the cache (since it was written before DOIs could be registered in draft state) - doiDataCiteRegisterService.deleteIdentifier(identifier); - //So we call the deleteDraftIdentifier method below until things are refactored - deleteDraftIdentifier(dvObject); - break; - - case PUBLIC: - case FINDABLE: - //if public then it has been released set to unavailable and reset target to n2t url - Map metadata = addDOIMetadataForDestroyedDataset(dvObject); - metadata.put("_status", "registered"); - metadata.put("_target", getTargetUrl(dvObject)); - doiDataCiteRegisterService.deactivateIdentifier(identifier, metadata, dvObject); - break; - } - } - } - - /** - * Deletes DOI from the DataCite side, if possible. Only "draft" DOIs can be - * deleted. - */ - private void deleteDraftIdentifier(DvObject dvObject) throws IOException { - - //ToDo - incorporate into DataCiteRESTfulClient - String baseUrl = JvmSettings.DATACITE_REST_API_URL.lookup(); - String username = JvmSettings.DATACITE_USERNAME.lookup(); - String password = JvmSettings.DATACITE_PASSWORD.lookup(); - GlobalId doi = dvObject.getGlobalId(); - /** - * Deletes the DOI from DataCite if it can. Returns 204 if PID was deleted - * (only possible for "draft" DOIs), 405 (method not allowed) if the DOI - * wasn't deleted (because it's in "findable" state, for example, 404 if the - * DOI wasn't found, and possibly other status codes such as 500 if DataCite - * is down. - */ - - URL url = new URL(baseUrl + "/dois/" + doi.getAuthority() + "/" + doi.getIdentifier()); - HttpURLConnection connection = null; - connection = (HttpURLConnection) url.openConnection(); - connection.setRequestMethod("DELETE"); - String userpass = username + ":" + password; - String basicAuth = "Basic " + new String(Base64.getEncoder().encode(userpass.getBytes())); - connection.setRequestProperty("Authorization", basicAuth); - int status = connection.getResponseCode(); - if(status!=HttpStatus.SC_NO_CONTENT) { - logger.warning("Incorrect Response Status from DataCite: " + status + " : " + connection.getResponseMessage()); - throw new HttpException("Status: " + status); - } - logger.fine("deleteDoi status for " + doi.asString() + ": " + status); - } - - @Override - public boolean publicizeIdentifier(DvObject dvObject) { - logger.log(Level.FINE,"updateIdentifierStatus"); - if(dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty() ){ - dvObject = generateIdentifier(dvObject); - } - String identifier = getIdentifier(dvObject); - Map metadata = getUpdateMetadata(dvObject); - metadata.put("_status", PUBLIC); - metadata.put("datacite.publicationyear", generateYear(dvObject)); - metadata.put("_target", getTargetUrl(dvObject)); - try { - doiDataCiteRegisterService.registerIdentifier(identifier, metadata, dvObject); - return true; - } catch (Exception e) { - logger.log(Level.WARNING, "modifyMetadata failed: " + e.getMessage(), e); - return false; - } - } - - - @Override - public List getProviderInformation(){ - return List.of("DataCite", "https://status.datacite.org"); - } - - - - @Override - protected String getProviderKeyName() { - return "DataCite"; - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DOIServiceBean.java deleted file mode 100644 index 0182c745cd0..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/DOIServiceBean.java +++ /dev/null @@ -1,78 +0,0 @@ -package edu.harvard.iq.dataverse; - -import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; - -public abstract class DOIServiceBean extends AbstractGlobalIdServiceBean { - - public static final String DOI_PROTOCOL = "doi"; - public static final String DOI_RESOLVER_URL = "https://doi.org/"; - public static final String HTTP_DOI_RESOLVER_URL = "http://doi.org/"; - public static final String DXDOI_RESOLVER_URL = "https://dx.doi.org/"; - public static final String HTTP_DXDOI_RESOLVER_URL = "http://dx.doi.org/"; - - public DOIServiceBean() { - super(); - } - - @Override - public GlobalId parsePersistentId(String pidString) { - if (pidString.startsWith(DOI_RESOLVER_URL)) { - pidString = pidString.replace(DOI_RESOLVER_URL, - (DOI_PROTOCOL + ":")); - } else if (pidString.startsWith(HTTP_DOI_RESOLVER_URL)) { - pidString = pidString.replace(HTTP_DOI_RESOLVER_URL, - (DOI_PROTOCOL + ":")); - } else if (pidString.startsWith(DXDOI_RESOLVER_URL)) { - pidString = pidString.replace(DXDOI_RESOLVER_URL, - (DOI_PROTOCOL + ":")); - } - return super.parsePersistentId(pidString); - } - - @Override - public GlobalId parsePersistentId(String protocol, String identifierString) { - - if (!DOI_PROTOCOL.equals(protocol)) { - return null; - } - GlobalId globalId = super.parsePersistentId(protocol, identifierString); - if (globalId!=null && !GlobalIdServiceBean.checkDOIAuthority(globalId.getAuthority())) { - return null; - } - return globalId; - } - - @Override - public GlobalId parsePersistentId(String protocol, String authority, String identifier) { - - if (!DOI_PROTOCOL.equals(protocol)) { - return null; - } - return super.parsePersistentId(protocol, authority, identifier); - } - - public String getUrlPrefix() { - return DOI_RESOLVER_URL; - } - - @Override - public boolean isConfigured() { - if (configured == null) { - if (getProviderKeyName() == null) { - configured = false; - } else { - String doiProvider = settingsService.getValueForKey(Key.DoiProvider, ""); - if (getProviderKeyName().equals(doiProvider)) { - configured = true; - } else if (!doiProvider.isEmpty()) { - configured = false; - } - } - } - return super.isConfigured(); - } - - protected String getProviderKeyName() { - return null; - } -} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java index 9b4b89db44f..a012175deae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java @@ -7,6 +7,7 @@ import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; import java.io.BufferedWriter; import java.io.ByteArrayOutputStream; @@ -635,12 +636,12 @@ public Map getDataCiteMetadata() { String authorString = getAuthorsString(); if (authorString.isEmpty()) { - authorString = AbstractGlobalIdServiceBean.UNAVAILABLE; + authorString = AbstractPidProvider.UNAVAILABLE; } String producerString = getPublisher(); if (producerString.isEmpty()) { - producerString = AbstractGlobalIdServiceBean.UNAVAILABLE; + producerString = AbstractPidProvider.UNAVAILABLE; } metadata.put("datacite.creator", authorString); diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index 0f83ae3c5c8..53cdff31cc2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -18,7 +18,6 @@ import edu.harvard.iq.dataverse.util.ShapefileHandler; import edu.harvard.iq.dataverse.util.StringUtil; import java.io.IOException; -import java.util.Date; import java.util.List; import java.util.ArrayList; import java.util.Objects; @@ -33,7 +32,7 @@ import jakarta.json.JsonArrayBuilder; import jakarta.persistence.*; import jakarta.validation.constraints.Pattern; -import org.hibernate.validator.constraints.NotBlank; +import jakarta.validation.constraints.NotBlank; /** * @@ -53,9 +52,9 @@ }) @Entity @Table(indexes = {@Index(columnList="ingeststatus") - , @Index(columnList="checksumvalue") - , @Index(columnList="contenttype") - , @Index(columnList="restricted")}) + , @Index(columnList="checksumvalue") + , @Index(columnList="contenttype") + , @Index(columnList="restricted")}) public class DataFile extends DvObject implements Comparable { private static final Logger logger = Logger.getLogger(DatasetPage.class.getCanonicalName()); private static final long serialVersionUID = 1L; @@ -73,10 +72,6 @@ public class DataFile extends DvObject implements Comparable { @Pattern(regexp = "^.*/.*$", message = "{contenttype.slash}") private String contentType; - public void setFileAccessRequests(List fileAccessRequests) { - this.fileAccessRequests = fileAccessRequests; - } - // @Expose // @SerializedName("storageIdentifier") // @Column( nullable = false ) @@ -200,6 +195,28 @@ public String toString() { @OneToMany(mappedBy="dataFile", cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) private List guestbookResponses; + @OneToMany(mappedBy="dataFile",fetch = FetchType.LAZY,cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST, CascadeType.REFRESH}) + private List fileAccessRequests; + + @ManyToMany + @JoinTable(name = "fileaccessrequests", + joinColumns = @JoinColumn(name = "datafile_id"), + inverseJoinColumns = @JoinColumn(name = "authenticated_user_id")) + private List fileAccessRequesters; + + + public List getFileAccessRequests(){ + return fileAccessRequests; + } + + public List getFileAccessRequests(FileAccessRequest.RequestState state){ + return fileAccessRequests.stream().filter(far -> far.getState() == state).collect(Collectors.toList()); + } + + public void setFileAccessRequests(List fARs){ + this.fileAccessRequests = fARs; + } + public List getGuestbookResponses() { return guestbookResponses; } @@ -369,7 +386,17 @@ public JsonArrayBuilder getTagLabelsAsJsonArrayBuilder(){ public void setTags(List dataFileTags) { this.dataFileTags = dataFileTags; } - + + public void addUniqueTagByLabel(String tagLabel) throws IllegalArgumentException { + if (tagExists(tagLabel)) { + return; + } + DataFileTag tag = new DataFileTag(); + tag.setTypeByLabel(tagLabel); + tag.setDataFile(this); + addTag(tag); + } + public void addTag(DataFileTag tag) { if (dataFileTags == null) { dataFileTags = new ArrayList<>(); @@ -518,61 +545,61 @@ public void setDescription(String description) { fmd.setDescription(description); } } + + public FileMetadata getDraftFileMetadata() { + FileMetadata latestFileMetadata = getLatestFileMetadata(); + if (latestFileMetadata.getDatasetVersion().isDraft()) { + return latestFileMetadata; + } + return null; + } public FileMetadata getFileMetadata() { return getLatestFileMetadata(); } - + public FileMetadata getLatestFileMetadata() { - FileMetadata fmd = null; + FileMetadata resultFileMetadata = null; - // for newly added or harvested, just return the one fmd if (fileMetadatas.size() == 1) { return fileMetadatas.get(0); } - + for (FileMetadata fileMetadata : fileMetadatas) { - // if it finds a draft, return it if (fileMetadata.getDatasetVersion().getVersionState().equals(VersionState.DRAFT)) { return fileMetadata; - } - - // otherwise return the one with the latest version number - // duplicate logic in getLatestPublishedFileMetadata() - if (fmd == null || fileMetadata.getDatasetVersion().getVersionNumber().compareTo( fmd.getDatasetVersion().getVersionNumber() ) > 0 ) { - fmd = fileMetadata; - } else if ((fileMetadata.getDatasetVersion().getVersionNumber().compareTo( fmd.getDatasetVersion().getVersionNumber())==0 )&& - ( fileMetadata.getDatasetVersion().getMinorVersionNumber().compareTo( fmd.getDatasetVersion().getMinorVersionNumber()) > 0 ) ) { - fmd = fileMetadata; } + resultFileMetadata = getTheNewerFileMetadata(resultFileMetadata, fileMetadata); } - return fmd; + + return resultFileMetadata; } - -// //Returns null if no published version + public FileMetadata getLatestPublishedFileMetadata() throws UnsupportedOperationException { - FileMetadata fmd = null; - - for (FileMetadata fileMetadata : fileMetadatas) { - // if it finds a draft, skip - if (fileMetadata.getDatasetVersion().getVersionState().equals(VersionState.DRAFT)) { - continue; - } - - // otherwise return the one with the latest version number - // duplicate logic in getLatestFileMetadata() - if (fmd == null || fileMetadata.getDatasetVersion().getVersionNumber().compareTo( fmd.getDatasetVersion().getVersionNumber() ) > 0 ) { - fmd = fileMetadata; - } else if ((fileMetadata.getDatasetVersion().getVersionNumber().compareTo( fmd.getDatasetVersion().getVersionNumber())==0 )&& - ( fileMetadata.getDatasetVersion().getMinorVersionNumber().compareTo( fmd.getDatasetVersion().getMinorVersionNumber()) > 0 ) ) { - fmd = fileMetadata; - } - } - if(fmd == null) { + FileMetadata resultFileMetadata = fileMetadatas.stream() + .filter(metadata -> !metadata.getDatasetVersion().getVersionState().equals(VersionState.DRAFT)) + .reduce(null, DataFile::getTheNewerFileMetadata); + + if (resultFileMetadata == null) { throw new UnsupportedOperationException("No published metadata version for DataFile " + this.getId()); } - return fmd; + return resultFileMetadata; + } + + public static FileMetadata getTheNewerFileMetadata(FileMetadata current, FileMetadata candidate) { + if (current == null) { + return candidate; + } + + DatasetVersion currentVersion = current.getDatasetVersion(); + DatasetVersion candidateVersion = candidate.getDatasetVersion(); + + if (DatasetVersion.compareByVersion.compare(candidateVersion, currentVersion) > 0) { + return candidate; + } + + return current; } /** @@ -583,7 +610,7 @@ public long getFilesize() { if (this.filesize == null) { // -1 means "unknown" return -1; - } + } return this.filesize; } @@ -613,7 +640,7 @@ public String getFriendlySize() { return BundleUtil.getStringFromBundle("file.sizeNotAvailable"); } } - + public boolean isRestricted() { return restricted; } @@ -750,50 +777,38 @@ public String getUnf() { return null; } - @OneToMany(mappedBy = "dataFile", cascade = {CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}, orphanRemoval = true) - private List fileAccessRequests; + public List getFileAccessRequesters() { + return fileAccessRequesters; + } - public List getFileAccessRequests() { - return fileAccessRequests; + public void setFileAccessRequesters(List fileAccessRequesters) { + this.fileAccessRequesters = fileAccessRequesters; } - public void addFileAccessRequester(AuthenticatedUser authenticatedUser) { + + public void addFileAccessRequest(FileAccessRequest request) { if (this.fileAccessRequests == null) { this.fileAccessRequests = new ArrayList<>(); } - Set existingUsers = this.fileAccessRequests.stream() - .map(FileAccessRequest::getAuthenticatedUser) - .collect(Collectors.toSet()); + this.fileAccessRequests.add(request); + } - if (existingUsers.contains(authenticatedUser)) { - return; + public FileAccessRequest getAccessRequestForAssignee(RoleAssignee roleAssignee) { + if (this.fileAccessRequests == null) { + return null; } - FileAccessRequest request = new FileAccessRequest(); - request.setCreationTime(new Date()); - request.setDataFile(this); - request.setAuthenticatedUser(authenticatedUser); - - FileAccessRequest.FileAccessRequestKey key = new FileAccessRequest.FileAccessRequestKey(); - key.setAuthenticatedUser(authenticatedUser.getId()); - key.setDataFile(this.getId()); - - request.setId(key); - - this.fileAccessRequests.add(request); + return this.fileAccessRequests.stream() + .filter(fileAccessRequest -> fileAccessRequest.getRequester().equals(roleAssignee) && fileAccessRequest.isStateCreated()).findFirst() + .orElse(null); } - public boolean removeFileAccessRequester(RoleAssignee roleAssignee) { + public boolean removeFileAccessRequest(FileAccessRequest request) { if (this.fileAccessRequests == null) { return false; } - FileAccessRequest request = this.fileAccessRequests.stream() - .filter(fileAccessRequest -> fileAccessRequest.getAuthenticatedUser().equals(roleAssignee)) - .findFirst() - .orElse(null); - if (request != null) { this.fileAccessRequests.remove(request); return true; @@ -802,13 +817,13 @@ public boolean removeFileAccessRequester(RoleAssignee roleAssignee) { return false; } - public boolean containsFileAccessRequestFromUser(RoleAssignee roleAssignee) { + public boolean containsActiveFileAccessRequestFromUser(RoleAssignee roleAssignee) { if (this.fileAccessRequests == null) { return false; } - Set existingUsers = this.fileAccessRequests.stream() - .map(FileAccessRequest::getAuthenticatedUser) + Set existingUsers = getFileAccessRequests(FileAccessRequest.RequestState.CREATED).stream() + .map(FileAccessRequest::getRequester) .collect(Collectors.toSet()); return existingUsers.contains(roleAssignee); @@ -975,8 +990,6 @@ public String toJSON(){ public JsonObject asGsonObject(boolean prettyPrint){ - String overarchingKey = "data"; - GsonBuilder builder; if (prettyPrint){ // Add pretty printing builder = new GsonBuilder().excludeFieldsWithoutExposeAnnotation().setPrettyPrinting(); @@ -1090,8 +1103,12 @@ public String getTargetUrl() { return DataFile.TARGET_URL; } + private boolean tagExists(String tagLabel) { + for (DataFileTag dataFileTag : dataFileTags) { + if (dataFileTag.getTypeLabel().equals(tagLabel)) { + return true; + } + } + return false; + } } // end of class - - - - diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 98ee3351458..8ceb529a5d4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.StorageIO; @@ -7,17 +8,24 @@ import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.search.SolrSearchResult; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.storageuse.StorageQuota; +import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean; +import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; import edu.harvard.iq.dataverse.util.FileSortFieldAndOrder; import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.IOException; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.UUID; import java.util.logging.Level; import java.util.logging.Logger; @@ -36,8 +44,6 @@ * * @author Leonid Andreev * - * Basic skeleton of the new DataFile service for DVN 4.0 - * */ @Stateless @@ -59,6 +65,11 @@ public class DataFileServiceBean implements java.io.Serializable { @EJB EmbargoServiceBean embargoService; + @EJB SystemConfig systemConfig; + + @EJB + StorageUseServiceBean storageUseService; + @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; @@ -146,6 +157,27 @@ public DataFile find(Object pk) { }*/ + public List findAll(List fileIds){ + List dataFiles = new ArrayList<>(); + + for (Long fileId : fileIds){ + dataFiles.add(find(fileId)); + } + + return dataFiles; + } + + public List findAll(String fileIdsAsString){ + ArrayList dataFileIds = new ArrayList<>(); + + String[] fileIds = fileIdsAsString.split(","); + for (String fId : fileIds){ + dataFileIds.add(Long.parseLong(fId)); + } + + return findAll(dataFileIds); + } + public DataFile findByGlobalId(String globalId) { return (DataFile) dvObjectService.findByGlobalId(globalId, DvObject.DType.DataFile); } @@ -354,6 +386,18 @@ public FileMetadata findMostRecentVersionFileIsIn(DataFile file) { return fileMetadatas.get(0); } } + + public List findAllCheapAndEasy(String fileIdsAsString){ + //assumption is that the fileIds are separated by ',' + ArrayList dataFilesFound = new ArrayList<>(); + String[] fileIds = fileIdsAsString.split(","); + DataFile df = this.findCheapAndEasy(Long.parseLong(fileIds[0])); + if(df != null){ + dataFilesFound.add(df); + } + + return dataFilesFound; + } public DataFile findCheapAndEasy(Long id) { DataFile dataFile; @@ -566,6 +610,125 @@ public DataFile findCheapAndEasy(Long id) { return dataFile; } + private List retrieveFileAccessRequesters(DataFile fileIn) { + List retList = new ArrayList<>(); + + // List requesters = em.createNativeQuery("select authenticated_user_id + // from fileaccessrequests where datafile_id = + // "+fileIn.getId()).getResultList(); + TypedQuery typedQuery = em.createQuery("select f.user.id from FileAccessRequest f where f.dataFile.id = :file_id and f.requestState= :requestState", Long.class); + typedQuery.setParameter("file_id", fileIn.getId()); + typedQuery.setParameter("requestState", FileAccessRequest.RequestState.CREATED); + List requesters = typedQuery.getResultList(); + for (Long userId : requesters) { + AuthenticatedUser user = userService.find(userId); + if (user != null) { + retList.add(user); + } + } + + return retList; + } + + private List retrieveFileMetadataForVersion(Dataset dataset, DatasetVersion version, List dataFiles, Map filesMap, Map categoryMap) { + List retList = new ArrayList<>(); + Map> categoryMetaMap = new HashMap<>(); + + List categoryResults = em.createNativeQuery("select t0.filecategories_id, t0.filemetadatas_id from filemetadata_datafilecategory t0, filemetadata t1 where (t0.filemetadatas_id = t1.id) AND (t1.datasetversion_id = "+version.getId()+")").getResultList(); + int i = 0; + for (Object[] result : categoryResults) { + Long category_id = (Long) result[0]; + Long filemeta_id = (Long) result[1]; + if (categoryMetaMap.get(filemeta_id) == null) { + categoryMetaMap.put(filemeta_id, new HashSet<>()); + } + categoryMetaMap.get(filemeta_id).add(category_id); + i++; + } + logger.fine("Retrieved and mapped "+i+" file categories attached to files in the version "+version.getId()); + + List metadataResults = em.createNativeQuery("select id, datafile_id, DESCRIPTION, LABEL, RESTRICTED, DIRECTORYLABEL, prov_freeform from FileMetadata where datasetversion_id = "+version.getId() + " ORDER BY LABEL").getResultList(); + + for (Object[] result : metadataResults) { + Integer filemeta_id = (Integer) result[0]; + + if (filemeta_id == null) { + continue; + } + + Long file_id = (Long) result[1]; + if (file_id == null) { + continue; + } + + Integer file_list_id = filesMap.get(file_id); + if (file_list_id == null) { + continue; + } + FileMetadata fileMetadata = new FileMetadata(); + fileMetadata.setId(filemeta_id.longValue()); + fileMetadata.setCategories(new LinkedList<>()); + + if (categoryMetaMap.get(fileMetadata.getId()) != null) { + for (Long cat_id : categoryMetaMap.get(fileMetadata.getId())) { + if (categoryMap.get(cat_id) != null) { + fileMetadata.getCategories().add(dataset.getCategories().get(categoryMap.get(cat_id))); + } + } + } + + fileMetadata.setDatasetVersion(version); + + // Link the FileMetadata object to the DataFile: + fileMetadata.setDataFile(dataFiles.get(file_list_id)); + // ... and the DataFile back to the FileMetadata: + fileMetadata.getDataFile().getFileMetadatas().add(fileMetadata); + + String description = (String) result[2]; + + if (description != null) { + fileMetadata.setDescription(description); + } + + String label = (String) result[3]; + + if (label != null) { + fileMetadata.setLabel(label); + } + + Boolean restricted = (Boolean) result[4]; + if (restricted != null) { + fileMetadata.setRestricted(restricted); + } + + String dirLabel = (String) result[5]; + if (dirLabel != null){ + fileMetadata.setDirectoryLabel(dirLabel); + } + + String provFreeForm = (String) result[6]; + if (provFreeForm != null){ + fileMetadata.setProvFreeForm(provFreeForm); + } + + retList.add(fileMetadata); + } + + logger.fine("Retrieved "+retList.size()+" file metadatas for version "+version.getId()+" (inside the retrieveFileMetadataForVersion method)."); + + + /* + We no longer perform this sort here, just to keep this filemetadata + list as identical as possible to when it's produced by the "traditional" + EJB method. When it's necessary to have the filemetadatas sorted by + FileMetadata.compareByLabel, the DatasetVersion.getFileMetadatasSorted() + method should be called. + + Collections.sort(retList, FileMetadata.compareByLabel); */ + + return retList; + } + public List findIngestsInProgress() { if ( em.isOpen() ) { String qr = "select object(o) from DataFile as o where o.ingestStatus =:scheduledStatusCode or o.ingestStatus =:progressStatusCode order by o.id"; @@ -773,7 +936,7 @@ public boolean isThumbnailAvailable (DataFile file) { } // If thumbnails are not even supported for this class of files, - // there's notthing to talk about: + // there's nothing to talk about: if (!FileUtil.isThumbnailSupported(file)) { return false; } @@ -788,16 +951,16 @@ public boolean isThumbnailAvailable (DataFile file) { is more important... */ - - if (ImageThumbConverter.isThumbnailAvailable(file)) { - file = this.find(file.getId()); - file.setPreviewImageAvailable(true); - this.save(file); - return true; - } - - return false; + file = this.find(file.getId()); + if (ImageThumbConverter.isThumbnailAvailable(file)) { + file.setPreviewImageAvailable(true); + this.save(file); + return true; + } + file.setPreviewImageFail(true); + this.save(file); + return false; } @@ -1079,9 +1242,8 @@ public List selectFilesWithMissingOriginalSizes() { * Check that a identifier entered by the user is unique (not currently used * for any other study in this Dataverse Network). Also check for duplicate * in the remote PID service if needed - * @param userIdentifier - * @param datafile - * @param idServiceBean + * @param datafileId + * @param storageLocation * @return {@code true} iff the global identifier is unique. */ public void finalizeFileDelete(Long dataFileId, String storageLocation) throws IOException { @@ -1203,4 +1365,39 @@ public Embargo findEmbargo(Long id) { DataFile d = find(id); return d.getEmbargo(); } + + /** + * Checks if the supplied DvObjectContainer (Dataset or Collection; although + * only collection-level storage quotas are officially supported as of now) + * has a quota configured, and if not, keeps checking if any of the direct + * ancestor Collections further up have a configured quota. If it finds one, + * it will retrieve the current total content size for that specific ancestor + * dvObjectContainer and use it to define the quota limit for the upload + * session in progress. + * + * @param parent - DvObjectContainer, Dataset or Collection + * @return upload session size limit spec, or null if quota not defined on + * any of the ancestor DvObjectContainers + */ + public UploadSessionQuotaLimit getUploadSessionQuotaLimit(DvObjectContainer parent) { + DvObjectContainer testDvContainer = parent; + StorageQuota quota = testDvContainer.getStorageQuota(); + while (quota == null && testDvContainer.getOwner() != null) { + testDvContainer = testDvContainer.getOwner(); + quota = testDvContainer.getStorageQuota(); + if (quota != null) { + break; + } + } + if (quota == null || quota.getAllocation() == null) { + return null; + } + + // Note that we are checking the recorded storage use not on the + // immediate parent necessarily, but on the specific ancestor + // DvObjectContainer on which the storage quota is defined: + Long currentSize = storageUseService.findStorageSizeByDvContainerId(testDvContainer.getId()); + + return new UploadSessionQuotaLimit(quota.getAllocation(), currentSize); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileTag.java b/src/main/java/edu/harvard/iq/dataverse/DataFileTag.java index f4f66d3c874..351c4032939 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileTag.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileTag.java @@ -58,7 +58,7 @@ public enum TagType {Survey, TimeSeries, Panel, Event, Genomics, Network, Geospa private static final Map TagTypeToLabels = new HashMap<>(); - private static final Map TagLabelToTypes = new HashMap<>(); + public static final Map TagLabelToTypes = new HashMap<>(); static { diff --git a/src/main/java/edu/harvard/iq/dataverse/DataTable.java b/src/main/java/edu/harvard/iq/dataverse/DataTable.java index a17d8c65138..95f3aed0f40 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataTable.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataTable.java @@ -112,6 +112,16 @@ public DataTable() { @Column( nullable = true ) private String originalFileName; + + /** + * The physical tab-delimited file is in storage with the list of variable + * names saved as the 1st line. This means that we do not need to generate + * this line on the fly. (Also means that direct download mechanism can be + * used for this file!) + */ + @Column(nullable = false) + private boolean storedWithVariableHeader = false; + /* * Getter and Setter methods: */ @@ -206,6 +216,14 @@ public void setOriginalFileName(String originalFileName) { this.originalFileName = originalFileName; } + public boolean isStoredWithVariableHeader() { + return storedWithVariableHeader; + } + + public void setStoredWithVariableHeader(boolean storedWithVariableHeader) { + this.storedWithVariableHeader = storedWithVariableHeader; + } + /* * Custom overrides for hashCode(), equals() and toString() methods: */ diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index 620e66c6c54..bafec4ed7ba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -35,6 +35,7 @@ import jakarta.persistence.TemporalType; import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.storageuse.StorageUse; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -158,6 +159,23 @@ public void setCitationDateDatasetFieldType(DatasetFieldType citationDateDataset this.citationDateDatasetFieldType = citationDateDatasetFieldType; } + // Per DataCite best practices, the citation date of a dataset may need + // to be adjusted to reflect the latest embargo availability date of any + // file within the first published version. + // If any files are embargoed in the first version, this date will be + // calculated and cached here upon its publication, in the + // FinalizeDatasetPublicationCommand. + private Timestamp embargoCitationDate; + + public Timestamp getEmbargoCitationDate() { + return embargoCitationDate; + } + + public void setEmbargoCitationDate(Timestamp embargoCitationDate) { + this.embargoCitationDate = embargoCitationDate; + } + + @ManyToOne @JoinColumn(name="template_id",nullable = true) @@ -172,6 +190,10 @@ public void setTemplate(Template template) { } public Dataset() { + this(false); + } + + public Dataset(boolean isHarvested) { DatasetVersion datasetVersion = new DatasetVersion(); datasetVersion.setDataset(this); datasetVersion.setVersionState(DatasetVersion.VersionState.DRAFT); @@ -179,6 +201,11 @@ public Dataset() { datasetVersion.setVersionNumber((long) 1); datasetVersion.setMinorVersionNumber((long) 0); versions.add(datasetVersion); + + if (!isHarvested) { + StorageUse storageUse = new StorageUse(this); + this.setStorageUse(storageUse); + } } /** @@ -290,6 +317,7 @@ public boolean isDeaccessioned() { } return hasDeaccessionedVersions; // since any published version would have already returned } + public DatasetVersion getLatestVersion() { return getVersions().get(0); @@ -676,20 +704,10 @@ public Timestamp getCitationDate() { Timestamp citationDate = null; //Only calculate if this dataset doesn't use an alternate date field for publication date if (citationDateDatasetFieldType == null) { - List versions = this.versions; - // TODo - is this ever not version 1.0 (or draft if not published yet) - DatasetVersion oldest = versions.get(versions.size() - 1); citationDate = super.getPublicationDate(); - if (oldest.isPublished()) { - List fms = oldest.getFileMetadatas(); - for (FileMetadata fm : fms) { - Embargo embargo = fm.getDataFile().getEmbargo(); - if (embargo != null) { - Timestamp embDate = Timestamp.valueOf(embargo.getDateAvailable().atStartOfDay()); - if (citationDate.compareTo(embDate) < 0) { - citationDate = embDate; - } - } + if (embargoCitationDate != null) { + if (citationDate.compareTo(embargoCitationDate) < 0) { + return embargoCitationDate; } } } @@ -862,6 +880,12 @@ public String getHarvestingDescription() { return null; } + public boolean hasEnabledGuestbook(){ + Guestbook gb = this.getGuestbook(); + + return ( gb != null && gb.isEnabled()); + } + @Override public boolean equals(Object object) { // TODO: Warning - this method won't work in the case the id fields are not set diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java index 1621b80df55..22bad42df96 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java @@ -112,8 +112,8 @@ public class DatasetFieldConstant implements java.io.Serializable { public final static String geographicUnit="geographicUnit"; public final static String westLongitude="westLongitude"; public final static String eastLongitude="eastLongitude"; - public final static String northLatitude="northLongitude"; //Changed to match DB - incorrectly entered into DB: https://github.com/IQSS/dataverse/issues/5645 - public final static String southLatitude="southLongitude"; //Incorrect in DB: https://github.com/IQSS/dataverse/issues/5645 + public final static String northLatitude="northLatitude"; + public final static String southLatitude="southLatitude"; public final static String unitOfAnalysis="unitOfAnalysis"; public final static String universe="universe"; public final static String kindOfData="kindOfData"; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldDefaultValue.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldDefaultValue.java index 7746099818e..8ac98500890 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldDefaultValue.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldDefaultValue.java @@ -113,7 +113,7 @@ public int hashCode() { @Override public boolean equals(Object object) { - if (!(object instanceof DatasetField)) { + if (!(object instanceof DatasetFieldDefaultValue)) { return false; } DatasetFieldDefaultValue other = (DatasetFieldDefaultValue) object; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java index 620d4bf3e09..6223cd83773 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java @@ -19,6 +19,8 @@ import jakarta.ejb.EJB; import jakarta.ejb.Stateless; +import jakarta.ejb.TransactionAttribute; +import jakarta.ejb.TransactionAttributeType; import jakarta.inject.Named; import jakarta.json.Json; import jakarta.json.JsonArray; @@ -34,6 +36,7 @@ import jakarta.persistence.NoResultException; import jakarta.persistence.NonUniqueResultException; import jakarta.persistence.PersistenceContext; +import jakarta.persistence.PersistenceException; import jakarta.persistence.TypedQuery; import org.apache.commons.codec.digest.DigestUtils; @@ -46,7 +49,6 @@ import org.apache.http.impl.client.HttpClients; import org.apache.http.protocol.HttpContext; import org.apache.http.util.EntityUtils; - import edu.harvard.iq.dataverse.settings.SettingsServiceBean; /** @@ -448,6 +450,7 @@ public JsonObject getExternalVocabularyValue(String termUri) { * @param cvocEntry - the configuration for the DatasetFieldType associated with this term * @param term - the term uri as a string */ + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void registerExternalTerm(JsonObject cvocEntry, String term) { String retrievalUri = cvocEntry.getString("retrieval-uri"); String prefix = cvocEntry.getString("prefix", null); @@ -500,7 +503,8 @@ public void process(HttpResponse response, HttpContext context) throws HttpExcep .setRetryHandler(new DefaultHttpRequestRetryHandler(3, false)) .build()) { HttpGet httpGet = new HttpGet(retrievalUri); - httpGet.addHeader("Accept", "application/json+ld, application/json"); + //application/json+ld is for backward compatibility + httpGet.addHeader("Accept", "application/ld+json, application/json+ld, application/json"); HttpResponse response = httpClient.execute(httpGet); String data = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8); @@ -517,6 +521,8 @@ public void process(HttpResponse response, HttpContext context) throws HttpExcep logger.fine("Wrote value for term: " + term); } catch (JsonException je) { logger.severe("Error retrieving: " + retrievalUri + " : " + je.getMessage()); + } catch (PersistenceException e) { + logger.fine("Problem persisting: " + retrievalUri + " : " + e.getMessage()); } } else { logger.severe("Received response code : " + statusCode + " when retrieving " + retrievalUri diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java index 824b486a42d..01785359e0e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java @@ -284,7 +284,7 @@ public void setDisplayOnCreate(boolean displayOnCreate) { } public boolean isControlledVocabulary() { - return controlledVocabularyValues != null && !controlledVocabularyValues.isEmpty(); + return allowControlledVocabulary; } /** diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java index b6c21014f04..610bb70ff49 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java @@ -8,9 +8,7 @@ import edu.harvard.iq.dataverse.DatasetFieldType.FieldType; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.Date; -import java.util.GregorianCalendar; +import java.util.*; import java.util.logging.Logger; import java.util.regex.Pattern; import jakarta.validation.ConstraintValidator; @@ -34,7 +32,6 @@ public void initialize(ValidateDatasetFieldType constraintAnnotation) { } public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext context) { - context.disableDefaultConstraintViolation(); // we do this so we can have different messages depending on the different issue boolean lengthOnly = false; @@ -55,6 +52,38 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte return true; } + // verify no junk in individual fields and values are within range + if (dsfType.getName() != null && (dsfType.getName().equals(DatasetFieldConstant.northLatitude) || dsfType.getName().equals(DatasetFieldConstant.southLatitude) || + dsfType.getName().equals(DatasetFieldConstant.westLongitude) || dsfType.getName().equals(DatasetFieldConstant.eastLongitude))) { + try { + verifyBoundingBoxCoordinatesWithinRange(dsfType.getName(), value.getValue()); + } catch (IllegalArgumentException iae) { + try { + context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + BundleUtil.getStringFromBundle("dataset.metadata.invalidEntry")).addConstraintViolation(); + } catch (NullPointerException e) { + } + return false; + } + } + + // validate fields that are siblings and depend on each others values + if (value.getDatasetField().getParentDatasetFieldCompoundValue() != null && + value.getDatasetField().getParentDatasetFieldCompoundValue().getParentDatasetField().getValidationMessage() == null) { + Optional failureMessage = validateChildConstraints(value.getDatasetField()); + if (failureMessage.isPresent()) { + try { + context.buildConstraintViolationWithTemplate(dsfType.getParentDatasetFieldType().getDisplayName() + " " + + BundleUtil.getStringFromBundle(failureMessage.get()) ).addConstraintViolation(); + + // save the failure message in the parent so we don't keep validating the children + value.getDatasetField().getParentDatasetFieldCompoundValue().getParentDatasetField().setValidationMessage(failureMessage.get()); + + } catch (NullPointerException npe) { + } + return false; + } + } + if (fieldType.equals(FieldType.TEXT) && !lengthOnly && value.getDatasetField().getDatasetFieldType().getValidationFormat() != null) { boolean valid = value.getValue().matches(value.getDatasetField().getDatasetFieldType().getValidationFormat()); if (!valid) { @@ -216,4 +245,60 @@ public boolean isValidAuthorIdentifier(String userInput, Pattern pattern) { return pattern.matcher(userInput).matches(); } + // Validate child fields against each other and return failure message or Optional.empty() if success + public Optional validateChildConstraints(DatasetField dsf) { + final String fieldName = dsf.getDatasetFieldType().getName() != null ? dsf.getDatasetFieldType().getName() : ""; + Optional returnFailureMessage = Optional.empty(); + + // Validate Child Constraint for Geospatial Bounding Box + // validate the four points of the box to insure proper layout + if (fieldName.equals(DatasetFieldConstant.northLatitude) || fieldName.equals(DatasetFieldConstant.westLongitude) + || fieldName.equals(DatasetFieldConstant.eastLongitude) || fieldName.equals(DatasetFieldConstant.southLatitude)) { + final String failureMessage = "dataset.metadata.invalidGeospatialCoordinates"; + + try { + final Map coords = new HashMap<>(); + dsf.getParentDatasetFieldCompoundValue().getChildDatasetFields().forEach(f -> { + coords.put(f.getDatasetFieldType().getName(), f.getValue()); + }); + if (!validateBoundingBox(coords.get(DatasetFieldConstant.westLongitude), + coords.get(DatasetFieldConstant.eastLongitude), + coords.get(DatasetFieldConstant.northLatitude), + coords.get(DatasetFieldConstant.southLatitude))) { + returnFailureMessage = Optional.of(failureMessage); + } + } catch (IllegalArgumentException e) { // IllegalArgumentException NumberFormatException + returnFailureMessage = Optional.of(failureMessage); + } + } + + return returnFailureMessage; + } + + public static boolean validateBoundingBox(final String westLon, final String eastLon, final String northLat, final String southLat) { + boolean returnVal = false; + + try { + Float west = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.westLongitude, westLon); + Float east = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.eastLongitude, eastLon); + Float north = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.northLatitude, northLat); + Float south = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.southLatitude, southLat); + returnVal = west <= east && south <= north; + } catch (IllegalArgumentException e) { + returnVal = false; + } + + return returnVal; + } + + private static Float verifyBoundingBoxCoordinatesWithinRange(final String name, final String value) throws IllegalArgumentException { + int max = name.equals(DatasetFieldConstant.westLongitude) || name.equals(DatasetFieldConstant.eastLongitude) ? 180 : 90; + int min = max * -1; + + final Float returnVal = value != null ? Float.parseFloat(value) : Float.NaN; + if (returnVal.isNaN() || returnVal < min || returnVal > max) { + throw new IllegalArgumentException(String.format("Value (%s) not in range (%s-%s)", returnVal.isNaN() ? "missing" : returnVal, min, max)); + } + return returnVal; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index d20175b6e1a..0641039e433 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -11,6 +11,9 @@ import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.SwiftAccessIO; import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil; @@ -40,7 +43,10 @@ import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.metadataimport.ForeignMetadataImportServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DataCiteDOIProvider; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrlUtil; @@ -361,6 +367,8 @@ public void setSelectedHostDataverse(Dataverse selectedHostDataverse) { * other boolean. */ private boolean versionHasTabular = false; + private boolean versionHasGlobus = false; + private boolean globusTransferRequested = false; private boolean showIngestSuccess; @@ -376,6 +384,19 @@ public void setShowIngestSuccess(boolean showIngestSuccess) { this.showIngestSuccess = showIngestSuccess; } + private String termsGuestbookPopupAction = ""; + + public void setTermsGuestbookPopupAction(String popupAction){ + if(popupAction != null && popupAction.length() > 0){ + this.termsGuestbookPopupAction = popupAction; + } + + } + + public String getTermsGuestbookPopupAction(){ + return termsGuestbookPopupAction; + } + // TODO: Consider renaming "configureTools" to "fileConfigureTools". List configureTools = new ArrayList<>(); // TODO: Consider renaming "exploreTools" to "fileExploreTools". @@ -391,6 +412,9 @@ public void setShowIngestSuccess(boolean showIngestSuccess) { Map> fileQueryToolsByFileId = new HashMap<>(); List fileQueryTools = new ArrayList<>(); private List datasetExploreTools; + private List datasetConfigureTools; + // The selected dataset-level configure tool + private ExternalTool datasetConfigureTool; public Boolean isHasRsyncScript() { return hasRsyncScript; @@ -490,7 +514,7 @@ public String getThumbnailString() { thumbnailString = datasetThumbnail.getBase64image(); } else { - thumbnailString = thumbnailServiceWrapper.getDatasetCardImageAsBase64Url(dataset, + thumbnailString = thumbnailServiceWrapper.getDatasetCardImageAsUrl(dataset, workingVersion.getId(), !workingVersion.isDraft(), ImageThumbConverter.DEFAULT_DATASETLOGO_SIZE); @@ -685,6 +709,16 @@ public void setNumberOfFilesToShow(Long numberOfFilesToShow) { this.numberOfFilesToShow = numberOfFilesToShow; } + private String returnReason = ""; + + public String getReturnReason() { + return returnReason; + } + + public void setReturnReason(String returnReason) { + this.returnReason = returnReason; + } + public void showAll(){ setNumberOfFilesToShow(new Long(fileMetadatasSearch.size())); } @@ -738,17 +772,29 @@ public boolean isIndexedVersion() { if (isIndexedVersion != null) { return isIndexedVersion; } + + // Just like on the collection page, facets on the Dataset page can be + // disabled instance-wide by an admin: + if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacets, false)) { + return isIndexedVersion = false; + } + // The version is SUPPOSED to be indexed if it's the latest published version, or a - // draft. So if none of the above is true, we return false right away: - + // draft. So if none of the above is true, we can return false right away. if (!(workingVersion.isDraft() || isThisLatestReleasedVersion())) { return isIndexedVersion = false; } - - // ... but if it is the latest published version or a draft, we want to test - // and confirm that this version *has* actually been indexed and is searchable - // (and that solr is actually up and running!), by running a quick solr search: - return isIndexedVersion = isThisVersionSearchable(); + // If this is the latest published version, we want to confirm that this + // version was successfully indexed after the last publication + + if (isThisLatestReleasedVersion()) { + return isIndexedVersion = (workingVersion.getDataset().getIndexTime() != null) + && workingVersion.getDataset().getIndexTime().after(workingVersion.getReleaseTime()); + } + + // Drafts don't have the indextime stamps set/incremented when indexed, + // so we'll just assume it is indexed, and will then hope for the best. + return isIndexedVersion = true; } /** @@ -804,8 +850,18 @@ public List getFileTagsFacetLabels() { /** * Verifies that solr is running and that the version is indexed and searchable * @return boolean - */ + * Commenting out this method for now, since we have decided it was not + * necessary, to query solr just to figure out if we can query solr. We will + * rely solely on the latest-relesed status and the indexed timestamp from + * the database for that. - L.A. + * public boolean isThisVersionSearchable() { + // Just like on the collection page, facets on the Dataset page can be + // disabled instance-wide by an admin: + if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacets, false)) { + return false; + } + SolrQuery solrQuery = new SolrQuery(); solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, workingVersion.getDataset().getId().toString())); @@ -840,6 +896,7 @@ public boolean isThisVersionSearchable() { return false; } + */ /** * Finds the list of numeric datafile ids in the Version specified, by running @@ -951,10 +1008,19 @@ public Set getFileIdsInVersionFromSolr(Long datasetVersionId, String patte logger.fine("Remote Solr Exception: " + ex.getLocalizedMessage()); String msg = ex.getLocalizedMessage(); if (msg.contains(SearchFields.FILE_DELETED)) { + // This is a backward compatibility hook put in place many versions + // ago, to accommodate instances running Solr with schemas that + // don't include this flag yet. Running Solr with an up-to-date + // schema has been a hard requirement for a while now; should we + // remove it at this point? - L.A. fileDeletedFlagNotIndexed = true; + } else { + isIndexedVersion = false; + return resultIds; } } catch (Exception ex) { logger.warning("Solr exception: " + ex.getLocalizedMessage()); + isIndexedVersion = false; return resultIds; } @@ -967,6 +1033,7 @@ public Set getFileIdsInVersionFromSolr(Long datasetVersionId, String patte queryResponse = solrClientService.getSolrClient().query(solrQuery); } catch (Exception ex) { logger.warning("Caught a Solr exception (again!): " + ex.getLocalizedMessage()); + isIndexedVersion = false; return resultIds; } } @@ -1870,9 +1937,6 @@ private String init(boolean initFull) { guestbookResponse = new GuestbookResponse(); - String nonNullDefaultIfKeyNotFound = ""; - protocol = settingsWrapper.getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); - authority = settingsWrapper.getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); String sortOrder = getSortOrder(); if(sortOrder != null) { FileMetadata.setCategorySortOrder(sortOrder); @@ -2054,8 +2118,6 @@ private String init(boolean initFull) { editMode = EditMode.CREATE; selectedHostDataverse = dataverseService.find(ownerId); dataset.setOwner(selectedHostDataverse); - dataset.setProtocol(protocol); - dataset.setAuthority(authority); if (dataset.getOwner() == null) { return permissionsWrapper.notFound(); @@ -2065,9 +2127,9 @@ private String init(boolean initFull) { //Wait until the create command before actually getting an identifier, except if we're using directUpload //Need to assign an identifier prior to calls to requestDirectUploadUrl if direct upload is used. if ( isEmpty(dataset.getIdentifier()) && systemConfig.directUploadEnabled(dataset) ) { - CommandContext ctxt = commandEngine.getContext(); - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); - dataset.setIdentifier(idServiceBean.generateDatasetIdentifier(dataset)); + CommandContext ctxt = commandEngine.getContext(); + PidProvider pidProvider = ctxt.dvObjects().getEffectivePidGenerator(dataset); + pidProvider.generatePid(dataset); } dataverseTemplates.addAll(dataverseService.find(ownerId).getTemplates()); if (!dataverseService.find(ownerId).isTemplateRoot()) { @@ -2134,10 +2196,19 @@ private String init(boolean initFull) { // the total "originals" size of the dataset with direct custom queries; // then we'll be able to drop the lookup hint for DataTable from the // findDeep() method for the version and further speed up the lookup - // a little bit. + // a little bit. + boolean globusDownloadEnabled = systemConfig.isGlobusDownload(); for (FileMetadata fmd : workingVersion.getFileMetadatas()) { - if (fmd.getDataFile().isTabularData()) { + DataFile df = fmd.getDataFile(); + if (df.isTabularData()) { versionHasTabular = true; + } + if(globusDownloadEnabled) { + if(GlobusAccessibleStore.isGlobusAccessible(DataAccess.getStorageDriverFromIdentifier(df.getStorageIdentifier()))) { + versionHasGlobus= true; + } + } + if(versionHasTabular &&(!globusDownloadEnabled || versionHasGlobus)) { break; } } @@ -2153,6 +2224,7 @@ private String init(boolean initFull) { previewTools = externalToolService.findFileToolsByType(ExternalTool.Type.PREVIEW); fileQueryTools = externalToolService.findFileToolsByType(ExternalTool.Type.QUERY); datasetExploreTools = externalToolService.findDatasetToolsByType(ExternalTool.Type.EXPLORE); + datasetConfigureTools = externalToolService.findDatasetToolsByType(ExternalTool.Type.CONFIGURE); rowsPerPage = 10; if (dataset.getId() != null && canUpdateDataset()) { hasRestrictedFiles = workingVersion.isHasRestrictedFile(); @@ -2262,14 +2334,17 @@ private void displayLockInfo(Dataset dataset) { lockedDueToIngestVar = true; } - // With DataCite, we try to reserve the DOI when the dataset is created. Sometimes this - // fails because DataCite is down. We show the message below to set expectations that the - // "Publish" button won't work until the DOI has been reserved using the "Reserve PID" API. - if (settingsWrapper.isDataCiteInstallation() && dataset.getGlobalIdCreateTime() == null && editMode != EditMode.CREATE) { - JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("dataset.locked.pidNotReserved.message"), - BundleUtil.getStringFromBundle("dataset.locked.pidNotReserved.message.details")); + if (dataset.getGlobalIdCreateTime() == null && editMode != EditMode.CREATE) { + // With DataCite, we try to reserve the DOI when the dataset is created. Sometimes this + // fails because DataCite is down. We show the message below to set expectations that the + // "Publish" button won't work until the DOI has been reserved using the "Reserve PID" API. + PidProvider pidProvider = PidUtil.getPidProvider(dataset.getGlobalId().getProviderId()); + if (DataCiteDOIProvider.TYPE.equals(pidProvider.getProviderType())) { + JH.addMessage(FacesMessage.SEVERITY_WARN, + BundleUtil.getStringFromBundle("dataset.locked.pidNotReserved.message"), + BundleUtil.getStringFromBundle("dataset.locked.pidNotReserved.message.details")); + } } - //if necessary refresh publish message also displayPublishMessage(); @@ -2433,6 +2508,10 @@ private DefaultTreeNode createFileTreeNode(FileMetadata fileMetadata, TreeNode p public boolean isVersionHasTabular() { return versionHasTabular; } + + public boolean isVersionHasGlobus() { + return versionHasGlobus; + } public boolean isReadOnly() { return readOnly; @@ -2584,8 +2663,7 @@ public void edit(EditMode editMode) { public String sendBackToContributor() { try { - //FIXME - Get Return Comment from sendBackToContributor popup - Command cmd = new ReturnDatasetToAuthorCommand(dvRequestService.getDataverseRequest(), dataset, ""); + Command cmd = new ReturnDatasetToAuthorCommand(dvRequestService.getDataverseRequest(), dataset, returnReason); dataset = commandEngine.submit(cmd); JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataset.reject.success")); } catch (CommandException ex) { @@ -2863,6 +2941,12 @@ public void sort() { public String refresh() { logger.fine("refreshing"); + //In v5.14, versionId was null here. In 6.0, it appears not to be. + //This check is to handle the null if it reappears/occurs under other circumstances + if(versionId==null) { + logger.warning("versionId was null in refresh"); + versionId = workingVersion.getId(); + } //dataset = datasetService.find(dataset.getId()); dataset = null; workingVersion = null; @@ -2872,10 +2956,9 @@ public String refresh() { DatasetVersionServiceBean.RetrieveDatasetVersionResponse retrieveDatasetVersionResponse = null; if (versionId != null) { - // versionId must have been set by now, in the init() method, - // regardless of how the page was originally called - by the dataset - // database id, by the persistent identifier, or by the db id of - // the version. + // versionId must have been set by now (see null check above), in the init() + // method, regardless of how the page was originally called - by the dataset + // database id, by the persistent identifier, or by the db id of the version. this.workingVersion = datasetVersionService.findDeep(versionId); dataset = workingVersion.getDataset(); } @@ -3034,6 +3117,26 @@ public void setSelectedNonDownloadableFiles(List selectedNonDownlo this.selectedNonDownloadableFiles = selectedNonDownloadableFiles; } + private List selectedGlobusTransferableFiles; + + public List getSelectedGlobusTransferableFiles() { + return selectedGlobusTransferableFiles; + } + + public void setSelectedGlobusTransferableFiles(List selectedGlobusTransferableFiles) { + this.selectedGlobusTransferableFiles = selectedGlobusTransferableFiles; + } + + private List selectedNonGlobusTransferableFiles; + + public List getSelectedNonGlobusTransferableFiles() { + return selectedNonGlobusTransferableFiles; + } + + public void setSelectedNonGlobusTransferableFiles(List selectedNonGlobusTransferableFiles) { + this.selectedNonGlobusTransferableFiles = selectedNonGlobusTransferableFiles; + } + public String getSizeOfDataset() { return DatasetUtil.getDownloadSize(workingVersion, false); } @@ -3143,9 +3246,9 @@ public void startDownloadSelectedOriginal() { private void startDownload(boolean downloadOriginal){ boolean guestbookRequired = isDownloadPopupRequired(); - boolean validate = validateFilesForDownload(guestbookRequired, downloadOriginal); + boolean validate = validateFilesForDownload(downloadOriginal); if (validate) { - updateGuestbookResponse(guestbookRequired, downloadOriginal); + updateGuestbookResponse(guestbookRequired, downloadOriginal, false); if(!guestbookRequired && !getValidateFilesOutcome().equals("Mixed")){ startMultipleFileDownload(); } @@ -3166,9 +3269,14 @@ public void setValidateFilesOutcome(String validateFilesOutcome) { this.validateFilesOutcome = validateFilesOutcome; } - public boolean validateFilesForDownload(boolean guestbookRequired, boolean downloadOriginal) { - setSelectedDownloadableFiles(new ArrayList<>()); - setSelectedNonDownloadableFiles(new ArrayList<>()); + public boolean validateFilesForDownload(boolean downloadOriginal){ + if (this.selectedFiles.isEmpty()) { + PrimeFaces.current().executeScript("PF('selectFilesForDownload').show()"); + return false; + } else { + this.filterSelectedFiles(); + } + //assume Pass unless something bad happens setValidateFilesOutcome("Pass"); Long bytes = (long) 0; @@ -3178,22 +3286,17 @@ public boolean validateFilesForDownload(boolean guestbookRequired, boolean downl return false; } - for (FileMetadata fmd : this.selectedFiles) { - if (this.fileDownloadHelper.canDownloadFile(fmd)) { - getSelectedDownloadableFiles().add(fmd); - DataFile dataFile = fmd.getDataFile(); - if (downloadOriginal && dataFile.isTabularData()) { - bytes += dataFile.getOriginalFileSize() == null ? 0 : dataFile.getOriginalFileSize(); - } else { - bytes += dataFile.getFilesize(); - } + for (FileMetadata fmd : getSelectedDownloadableFiles()) { + DataFile dataFile = fmd.getDataFile(); + if (downloadOriginal && dataFile.isTabularData()) { + bytes += dataFile.getOriginalFileSize() == null ? 0 : dataFile.getOriginalFileSize(); } else { - getSelectedNonDownloadableFiles().add(fmd); + bytes += dataFile.getFilesize(); } } - //if there are two or more files with a total size - //over the zip limit post a "too large" popup + //if there are two or more files, with a total size + //over the zip limit, post a "too large" popup if (bytes > settingsWrapper.getZipDownloadLimit() && selectedDownloadableFiles.size() > 1) { setValidateFilesOutcome("FailSize"); return false; @@ -3202,41 +3305,126 @@ public boolean validateFilesForDownload(boolean guestbookRequired, boolean downl // If some of the files were restricted and we had to drop them off the // list, and NONE of the files are left on the downloadable list // - we show them a "you're out of luck" popup: - if (getSelectedDownloadableFiles().isEmpty() && !getSelectedNonDownloadableFiles().isEmpty()) { + if (getSelectedDownloadableFiles().isEmpty() && getSelectedGlobusTransferableFiles().isEmpty() && !getSelectedNonDownloadableFiles().isEmpty()) { setValidateFilesOutcome("FailRestricted"); return false; } - if (!getSelectedDownloadableFiles().isEmpty() && !getSelectedNonDownloadableFiles().isEmpty()) { + //Some are selected and there are non-downloadable ones or there are both downloadable and globus transferable files + if ((!(getSelectedDownloadableFiles().isEmpty() && getSelectedGlobusTransferableFiles().isEmpty()) + && (!getSelectedNonDownloadableFiles().isEmpty()) || (!getSelectedDownloadableFiles().isEmpty() && !getSelectedGlobusTransferableFiles().isEmpty()))) { setValidateFilesOutcome("Mixed"); return true; } - - if (guestbookRequired) { + //ToDo - should Mixed not trigger this? + if (isTermsPopupRequired() || isGuestbookPopupRequiredAtDownload()) { setValidateFilesOutcome("GuestbookRequired"); } - return true; } - private void updateGuestbookResponse (boolean guestbookRequired, boolean downloadOriginal) { + private void updateGuestbookResponse (boolean guestbookRequired, boolean downloadOriginal, boolean isGlobusTransfer) { // Note that the GuestbookResponse object may still have information from // the last download action performed by the user. For example, it may // still have the non-null Datafile in it, if the user has just downloaded // a single file; or it may still have the format set to "original" - // even if that's not what they are trying to do now. // So make sure to reset these values: - guestbookResponse.setDataFile(null); - guestbookResponse.setSelectedFileIds(getSelectedDownloadableFilesIdsString()); + if(fileMetadataForAction == null) { + guestbookResponse.setDataFile(null); + } else { + guestbookResponse.setDataFile(fileMetadataForAction.getDataFile()); + } + if(isGlobusTransfer) { + guestbookResponse.setSelectedFileIds(getFilesIdsString(getSelectedGlobusTransferableFiles())); + } else { + guestbookResponse.setSelectedFileIds(getSelectedDownloadableFilesIdsString()); + } if (downloadOriginal) { guestbookResponse.setFileFormat("original"); } else { guestbookResponse.setFileFormat(""); } - guestbookResponse.setDownloadtype("Download"); + guestbookResponse.setEventType(GuestbookResponse.DOWNLOAD); + } + + /*helper function to filter the selected files into , + and and for reuse*/ + + /*helper function to filter the selected files into , + and and for reuse*/ + + private boolean filterSelectedFiles(){ + setSelectedDownloadableFiles(new ArrayList<>()); + setSelectedNonDownloadableFiles(new ArrayList<>()); + setSelectedRestrictedFiles(new ArrayList<>()); + setSelectedUnrestrictedFiles(new ArrayList<>()); + setSelectedGlobusTransferableFiles(new ArrayList<>()); + setSelectedNonGlobusTransferableFiles(new ArrayList<>()); + + boolean someFiles = false; + boolean globusDownloadEnabled = settingsWrapper.isGlobusDownload(); + for (FileMetadata fmd : this.selectedFiles){ + boolean downloadable=this.fileDownloadHelper.canDownloadFile(fmd); + + boolean globusTransferable = false; + if(globusDownloadEnabled) { + String driverId = DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier()); + globusTransferable = GlobusAccessibleStore.isGlobusAccessible(driverId); + downloadable = downloadable && !AbstractRemoteOverlayAccessIO.isNotDataverseAccessible(driverId); + } + if(downloadable){ + getSelectedDownloadableFiles().add(fmd); + someFiles=true; + } else { + getSelectedNonDownloadableFiles().add(fmd); + } + if(globusTransferable) { + getSelectedGlobusTransferableFiles().add(fmd); + someFiles=true; + } else { + getSelectedNonGlobusTransferableFiles().add(fmd); + } + if(fmd.isRestricted()){ + getSelectedRestrictedFiles().add(fmd); //might be downloadable to user or not + someFiles=true; + } else { + getSelectedUnrestrictedFiles().add(fmd); + someFiles=true; + } + + } + return someFiles; } + public void validateFilesForRequestAccess(){ + this.filterSelectedFiles(); + + if(!dataset.isFileAccessRequest()){ //is this needed? wouldn't be able to click Request Access if this !isFileAccessRequest() + return; + } + + if(!this.selectedRestrictedFiles.isEmpty()){ + ArrayList nonDownloadableRestrictedFiles = new ArrayList<>(); + + List userRequestedDataFiles = ((AuthenticatedUser) session.getUser()).getRequestedDataFiles(); + + for(FileMetadata fmd : this.selectedRestrictedFiles){ + if(!this.fileDownloadHelper.canDownloadFile(fmd) && !userRequestedDataFiles.contains(fmd.getDataFile())){ + nonDownloadableRestrictedFiles.add(fmd); + } + } + + if(!nonDownloadableRestrictedFiles.isEmpty()){ + guestbookResponse.setDataFile(null); + guestbookResponse.setSelectedFileIds(this.getFilesIdsString(nonDownloadableRestrictedFiles)); + this.requestAccessMultipleFiles(); + } else { + //popup select data files + } + } + } private boolean selectAllFiles; @@ -3262,26 +3450,23 @@ public void toggleAllSelected(){ // helper Method public String getSelectedFilesIdsString() { - String downloadIdString = ""; - for (FileMetadata fmd : this.selectedFiles){ - if (!StringUtil.isEmpty(downloadIdString)) { - downloadIdString += ","; - } - downloadIdString += fmd.getDataFile().getId(); - } - return downloadIdString; + return this.getFilesIdsString(this.selectedFiles); } - + // helper Method public String getSelectedDownloadableFilesIdsString() { - String downloadIdString = ""; - for (FileMetadata fmd : this.selectedDownloadableFiles){ - if (!StringUtil.isEmpty(downloadIdString)) { - downloadIdString += ","; + return this.getFilesIdsString(this.selectedDownloadableFiles); + } + + public String getFilesIdsString(List fileMetadatas){ //for reuse + String idString = ""; + for (FileMetadata fmd : fileMetadatas){ + if (!StringUtil.isEmpty(idString)) { + idString += ","; } - downloadIdString += fmd.getDataFile().getId(); + idString += fmd.getDataFile().getId(); } - return downloadIdString; + return idString; } @@ -3319,7 +3504,7 @@ public void saveLinkingDataverses(ActionEvent evt) { FacesMessage message = new FacesMessage(FacesMessage.SEVERITY_INFO, BundleUtil.getStringFromBundle("dataset.notlinked"), linkingDataverseErrorMessage); FacesContext.getCurrentInstance().addMessage(null, message); } - + alreadyLinkedDataverses = null; //force update to list of linked dataverses } private String linkingDataverseErrorMessage = ""; @@ -3355,7 +3540,23 @@ private Boolean saveLink(Dataverse dataverse){ } return retVal; } - + + private String alreadyLinkedDataverses = null; + + public String getAlreadyLinkedDataverses(){ + if (alreadyLinkedDataverses != null) { + return alreadyLinkedDataverses; + } + List dataverseList = dataverseService.findDataversesThatLinkToThisDatasetId(dataset.getId()); + for (Dataverse dv: dataverseList){ + if (alreadyLinkedDataverses == null){ + alreadyLinkedDataverses = dv.getCurrentName(); + } else { + alreadyLinkedDataverses = alreadyLinkedDataverses + ", " + dv.getCurrentName(); + } + } + return alreadyLinkedDataverses; + } public List completeLinkingDataverse(String query) { dataset = datasetService.find(dataset.getId()); @@ -5097,7 +5298,7 @@ public boolean isFileAccessRequestMultiButtonRequired(){ for (FileMetadata fmd : workingVersion.getFileMetadatas()){ AuthenticatedUser authenticatedUser = (AuthenticatedUser) session.getUser(); //Change here so that if all restricted files have pending requests there's no Request Button - if ((!this.fileDownloadHelper.canDownloadFile(fmd) && !fmd.getDataFile().containsFileAccessRequestFromUser(authenticatedUser))) { + if ((!this.fileDownloadHelper.canDownloadFile(fmd) && !fmd.getDataFile().containsActiveFileAccessRequestFromUser(authenticatedUser))) { return true; } } @@ -5108,6 +5309,9 @@ public boolean isFileAccessRequestMultiButtonEnabled(){ if (!isSessionUserAuthenticated() || !dataset.isFileAccessRequest()){ return false; } + //populate file lists + filterSelectedFiles(); + if( this.selectedRestrictedFiles == null || this.selectedRestrictedFiles.isEmpty() ){ return false; } @@ -5119,35 +5323,6 @@ public boolean isFileAccessRequestMultiButtonEnabled(){ return false; } - private Boolean downloadButtonAllEnabled = null; - - public boolean isDownloadAllButtonEnabled() { - - if (downloadButtonAllEnabled == null) { - for (FileMetadata fmd : workingVersion.getFileMetadatas()) { - if (!this.fileDownloadHelper.canDownloadFile(fmd)) { - downloadButtonAllEnabled = false; - break; - } - } - downloadButtonAllEnabled = true; - } - return downloadButtonAllEnabled; - } - - public boolean isDownloadSelectedButtonEnabled(){ - - if( this.selectedFiles == null || this.selectedFiles.isEmpty() ){ - return false; - } - for (FileMetadata fmd : this.selectedFiles){ - if (this.fileDownloadHelper.canDownloadFile(fmd)){ - return true; - } - } - return false; - } - public boolean isFileAccessRequestMultiSignUpButtonRequired(){ if (isSessionUserAuthenticated()){ return false; @@ -5190,7 +5365,24 @@ public boolean isDownloadPopupRequired() { public boolean isRequestAccessPopupRequired() { return FileUtil.isRequestAccessPopupRequired(workingVersion); } + + public boolean isGuestbookAndTermsPopupRequired() { + return FileUtil.isGuestbookAndTermsPopupRequired(workingVersion); + } + public boolean isGuestbookPopupRequired(){ + return FileUtil.isGuestbookPopupRequired(workingVersion); + } + + public boolean isTermsPopupRequired(){ + return FileUtil.isTermsPopupRequired(workingVersion); + } + + public boolean isGuestbookPopupRequiredAtDownload(){ + // Only show guestbookAtDownload if guestbook at request is disabled (legacy behavior) + return isGuestbookPopupRequired() && !workingVersion.getDataset().getEffectiveGuestbookEntryAtRequest(); + } + public String requestAccessMultipleFiles() { if (selectedFiles.isEmpty()) { @@ -5205,11 +5397,11 @@ public String requestAccessMultipleFiles() { for (FileMetadata fmd : selectedFiles){ fileDownloadHelper.addMultipleFilesForRequestAccess(fmd.getDataFile()); } - if (isRequestAccessPopupRequired()) { + if (isGuestbookAndTermsPopupRequired()) { //RequestContext requestContext = RequestContext.getCurrentInstance(); - PrimeFaces.current().executeScript("PF('requestAccessPopup').show()"); + PrimeFaces.current().executeScript("PF('guestbookAndTermsPopup').show()"); //the popup will call writeGuestbookAndRequestAccess(); return ""; - } else { + }else { //No popup required fileDownloadHelper.requestAccessIndirect(); return ""; @@ -5394,6 +5586,14 @@ public FileDownloadServiceBean getFileDownloadService() { public void setFileDownloadService(FileDownloadServiceBean fileDownloadService) { this.fileDownloadService = fileDownloadService; } + + public FileDownloadHelper getFileDownloadHelper() { + return fileDownloadHelper; + } + + public void setFileDownloadHelper(FileDownloadHelper fileDownloadHelper) { + this.fileDownloadHelper = fileDownloadHelper; + } public GuestbookResponseServiceBean getGuestbookResponseService() { @@ -5572,6 +5772,18 @@ public List getDatasetExploreTools() { return datasetExploreTools; } + public List getDatasetConfigureTools() { + return datasetConfigureTools; + } + + public ExternalTool getDatasetConfigureTool() { + return datasetConfigureTool; + } + + public void setDatasetConfigureTool(ExternalTool datasetConfigureTool) { + this.datasetConfigureTool = datasetConfigureTool; + } + Boolean thisLatestReleasedVersion = null; public boolean isThisLatestReleasedVersion() { @@ -5777,16 +5989,20 @@ public void setFolderPresort(boolean folderPresort) { public void explore(ExternalTool externalTool) { ApiToken apiToken = null; User user = session.getUser(); + apiToken = authService.getValidApiTokenForUser(user); + ExternalToolHandler externalToolHandler = new ExternalToolHandler(externalTool, dataset, apiToken, session.getLocaleCode()); + PrimeFaces.current().executeScript(externalToolHandler.getExploreScript()); + } + + public void configure(ExternalTool externalTool) { + ApiToken apiToken = null; + User user = session.getUser(); + //Not enabled for PrivateUrlUsers (who wouldn't have write permissions anyway) if (user instanceof AuthenticatedUser) { - apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); - } else if (user instanceof PrivateUrlUser) { - PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; - PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); - apiToken = new ApiToken(); - apiToken.setTokenString(privUrl.getToken()); + apiToken = authService.getValidApiTokenForAuthenticatedUser((AuthenticatedUser) user); } ExternalToolHandler externalToolHandler = new ExternalToolHandler(externalTool, dataset, apiToken, session.getLocaleCode()); - PrimeFaces.current().executeScript(externalToolHandler.getExploreScript()); + PrimeFaces.current().executeScript(externalToolHandler.getConfigureScript()); } private FileMetadata fileMetadataForAction; @@ -5832,7 +6048,7 @@ public String getEffectiveMetadataLanguage() { } public String getEffectiveMetadataLanguage(boolean ofParent) { String mdLang = ofParent ? dataset.getOwner().getEffectiveMetadataLanguage() : dataset.getEffectiveMetadataLanguage(); - if (mdLang.equals(DvObjectContainer.UNDEFINED_METADATA_LANGUAGE_CODE)) { + if (mdLang.equals(DvObjectContainer.UNDEFINED_CODE)) { mdLang = settingsWrapper.getDefaultMetadataLanguage(); } return mdLang; @@ -5840,7 +6056,7 @@ public String getEffectiveMetadataLanguage(boolean ofParent) { public String getLocaleDisplayName(String code) { String displayName = settingsWrapper.getBaseMetadataLanguageMap(false).get(code); - if(displayName==null && !code.equals(DvObjectContainer.UNDEFINED_METADATA_LANGUAGE_CODE)) { + if(displayName==null && !code.equals(DvObjectContainer.UNDEFINED_CODE)) { //Default (for cases such as :when a Dataset has a metadatalanguage code but :MetadataLanguages is no longer defined). displayName = new Locale(code).getDisplayName(); } @@ -6107,18 +6323,49 @@ public boolean isHasPublicStore() { return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId())); } - public void startGlobusTransfer() { - ApiToken apiToken = null; - User user = session.getUser(); - if (user instanceof AuthenticatedUser) { - apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); - } else if (user instanceof PrivateUrlUser) { - PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; - PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); - apiToken = new ApiToken(); - apiToken.setTokenString(privUrl.getToken()); + public boolean isGlobusTransferRequested() { + return globusTransferRequested; + } + + /** + * Analagous with the startDownload method, this method is called when the user + * tries to start a Globus transfer out (~download). The + * validateFilesForDownload call checks to see if there are some files that can + * be Globus transfered and, if so and there are no files that can't be + * transferre, this method will launch the globus transfer app. If there is a + * mix of files or if the guestbook popup is required, the method passes back to + * the UI so those popup(s) can be shown. Once they are, this method is called + * with the popupShown param true and the app will be shown. + * + * @param transferAll - when called from the dataset Access menu, this should be + * true so that all files are included in the processing. + * When it is called from the file table, the current + * selection is used and the param should be false. + * @param popupShown - This method is called twice if the the mixed files or + * guestbook popups are needed. On the first call, popupShown + * is false so that the transfer is not started and those + * popups can be shown. On the second call, popupShown is + * true and processing will occur as long as there are some + * valid files to transfer. + */ + public void startGlobusTransfer(boolean transferAll, boolean popupShown) { + if (transferAll) { + this.setSelectedFiles(workingVersion.getFileMetadatas()); + } + boolean guestbookRequired = isDownloadPopupRequired(); + + boolean validated = validateFilesForDownload(true); + if (validated) { + globusTransferRequested = true; + boolean mixed = "Mixed".equals(getValidateFilesOutcome()); + // transfer is + updateGuestbookResponse(guestbookRequired, true, true); + if ((!guestbookRequired && !mixed) || popupShown) { + boolean doNotSaveGuestbookResponse = workingVersion.isDraft(); + globusService.writeGuestbookAndStartTransfer(guestbookResponse, doNotSaveGuestbookResponse); + globusTransferRequested = false; + } } - PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken)); } public String getWebloaderUrlForDataset(Dataset d) { @@ -6144,7 +6391,10 @@ public String getWebloaderUrlForDataset(Dataset d) { String signpostingLinkHeader = null; public String getSignpostingLinkHeader() { - if (!workingVersion.isReleased()) { + if ((workingVersion==null) || (!workingVersion.isReleased())) { + if(workingVersion==null) { + logger.warning("workingVersion was null in getSignpostingLinkHeader"); + } return null; } if (signpostingLinkHeader == null) { @@ -6155,5 +6405,9 @@ public String getSignpostingLinkHeader() { } return signpostingLinkHeader; } + + public boolean isDOI() { + return AbstractDOIProvider.DOI_PROTOCOL.equals(dataset.getGlobalId().getProtocol()); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 52eb5868c35..a32141b8baf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -19,6 +19,8 @@ import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.harvest.server.OAIRecordServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -61,9 +63,6 @@ public class DatasetServiceBean implements java.io.Serializable { @EJB IndexServiceBean indexService; - @EJB - DOIEZIdServiceBean doiEZIdServiceBean; - @EJB SettingsServiceBean settingsService; @@ -137,7 +136,7 @@ public Dataset findDeep(Object pk) { .setHint("eclipselink.left-join-fetch", "o.files.roleAssignments") .getSingleResult(); } - + public List findByOwnerId(Long ownerId) { return findByOwnerId(ownerId, false); } @@ -788,13 +787,13 @@ public void exportDataset(Dataset dataset, boolean forceReExport) { } } } - + } //get a string to add to save success message //depends on page (dataset/file) and user privleges public String getReminderString(Dataset dataset, boolean canPublishDataset, boolean filePage, boolean isValid) { - + String reminderString; if (canPublishDataset) { @@ -940,80 +939,6 @@ public void callFinalizePublishCommandAsynchronously(Long datasetId, CommandCont } } - /* - Experimental asynchronous method for requesting persistent identifiers for - datafiles. We decided not to run this method on upload/create (so files - will not have persistent ids while in draft; when the draft is published, - we will force obtaining persistent ids for all the files in the version. - - If we go back to trying to register global ids on create, care will need to - be taken to make sure the asynchronous changes below are not conflicting with - the changes from file ingest (which may be happening in parallel, also - asynchronously). We would also need to lock the dataset (similarly to how - tabular ingest logs the dataset), to prevent the user from publishing the - version before all the identifiers get assigned - otherwise more conflicts - are likely. (It sounds like it would make sense to treat these two tasks - - persistent identifiers for files and ingest - as one post-upload job, so that - they can be run in sequence). -- L.A. Mar. 2018 - */ - @Asynchronous - public void obtainPersistentIdentifiersForDatafiles(Dataset dataset) { - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(dataset.getProtocol(), commandEngine.getContext()); - - //If the Id type is sequential and Dependent then write file idenitifiers outside the command - String datasetIdentifier = dataset.getIdentifier(); - Long maxIdentifier = null; - - if (systemConfig.isDataFilePIDSequentialDependent()) { - maxIdentifier = getMaximumExistingDatafileIdentifier(dataset); - } - - for (DataFile datafile : dataset.getFiles()) { - logger.info("Obtaining persistent id for datafile id=" + datafile.getId()); - - if (datafile.getIdentifier() == null || datafile.getIdentifier().isEmpty()) { - - logger.info("Obtaining persistent id for datafile id=" + datafile.getId()); - - if (maxIdentifier != null) { - maxIdentifier++; - datafile.setIdentifier(datasetIdentifier + "/" + maxIdentifier.toString()); - } else { - datafile.setIdentifier(idServiceBean.generateDataFileIdentifier(datafile)); - } - - if (datafile.getProtocol() == null) { - datafile.setProtocol(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, "")); - } - if (datafile.getAuthority() == null) { - datafile.setAuthority(settingsService.getValueForKey(SettingsServiceBean.Key.Authority, "")); - } - - logger.info("identifier: " + datafile.getIdentifier()); - - String doiRetString; - - try { - logger.log(Level.FINE, "creating identifier"); - doiRetString = idServiceBean.createIdentifier(datafile); - } catch (Throwable e) { - logger.log(Level.WARNING, "Exception while creating Identifier: " + e.getMessage(), e); - doiRetString = ""; - } - - // Check return value to make sure registration succeeded - if (!idServiceBean.registerWhenPublished() && doiRetString.contains(datafile.getIdentifier())) { - datafile.setIdentifierRegistered(true); - datafile.setGlobalIdCreateTime(new Date()); - } - - DataFile merged = em.merge(datafile); - merged = null; - } - - } - } - public long findStorageSize(Dataset dataset) throws IOException { return findStorageSize(dataset, false, GetDatasetStorageSizeCommand.Mode.STORAGE, null); } @@ -1126,5 +1051,5 @@ public void deleteHarvestedDataset(Dataset dataset, DataverseRequest request, Lo hdLogger.warning("Failed to destroy the dataset"); } } - + } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 93f45bd288e..5fd963f3931 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -68,7 +68,13 @@ query = "SELECT OBJECT(o) FROM DatasetVersion AS o WHERE o.dataset.harvestedFrom IS NULL and o.releaseTime IS NOT NULL and o.archivalCopyLocation IS NULL" ), @NamedQuery(name = "DatasetVersion.findById", - query = "SELECT o FROM DatasetVersion o LEFT JOIN FETCH o.fileMetadatas WHERE o.id=:id")}) + query = "SELECT o FROM DatasetVersion o LEFT JOIN FETCH o.fileMetadatas WHERE o.id=:id"), + @NamedQuery(name = "DatasetVersion.findByDataset", + query = "SELECT o FROM DatasetVersion o WHERE o.dataset.id=:datasetId ORDER BY o.versionNumber DESC, o.minorVersionNumber DESC"), + @NamedQuery(name = "DatasetVersion.findReleasedByDataset", + query = "SELECT o FROM DatasetVersion o WHERE o.dataset.id=:datasetId AND o.versionState=edu.harvard.iq.dataverse.DatasetVersion.VersionState.RELEASED ORDER BY o.versionNumber DESC, o.minorVersionNumber DESC")/*, + @NamedQuery(name = "DatasetVersion.findVersionElements", + query = "SELECT o.id, o.versionState, o.versionNumber, o.minorVersionNumber FROM DatasetVersion o WHERE o.dataset.id=:datasetId ORDER BY o.versionNumber DESC, o.minorVersionNumber DESC")*/}) @Entity diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java new file mode 100644 index 00000000000..99c3c65e3b8 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java @@ -0,0 +1,326 @@ +package edu.harvard.iq.dataverse; + +import edu.harvard.iq.dataverse.FileSearchCriteria.FileAccessStatus; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Tuple; +import jakarta.persistence.TypedQuery; +import jakarta.persistence.criteria.*; + +import java.io.Serializable; +import java.sql.Timestamp; +import java.util.*; + +import static edu.harvard.iq.dataverse.DataFileTag.TagLabelToTypes; + +@Stateless +@Named +public class DatasetVersionFilesServiceBean implements Serializable { + + @PersistenceContext(unitName = "VDCNet-ejbPU") + private EntityManager em; + + /** + * Different criteria to sort the results of FileMetadata queries used in {@link DatasetVersionFilesServiceBean#getFileMetadatas} + */ + public enum FileOrderCriteria { + NameAZ, NameZA, Newest, Oldest, Size, Type + } + + /** + * Mode to base the search in {@link DatasetVersionFilesServiceBean#getFilesDownloadSize(DatasetVersion, FileSearchCriteria, FileDownloadSizeMode)} + *

        + * All: Includes both archival and original sizes for tabular files + * Archival: Includes only the archival size for tabular files + * Original: Includes only the original size for tabular files + *

        + * All the modes include archival sizes for non-tabular files + */ + public enum FileDownloadSizeMode { + All, Original, Archival + } + + /** + * Given a DatasetVersion, returns its total file metadata count + * + * @param datasetVersion the DatasetVersion to access + * @param searchCriteria for counting only files matching this criteria + * @return long value of total file metadata count + */ + public long getFileMetadataCount(DatasetVersion datasetVersion, FileSearchCriteria searchCriteria) { + CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder(); + CriteriaQuery criteriaQuery = criteriaBuilder.createQuery(Long.class); + Root fileMetadataRoot = criteriaQuery.from(FileMetadata.class); + criteriaQuery + .select(criteriaBuilder.count(fileMetadataRoot)) + .where(createSearchCriteriaPredicate(datasetVersion, searchCriteria, criteriaBuilder, criteriaQuery, fileMetadataRoot)); + return em.createQuery(criteriaQuery).getSingleResult(); + } + + /** + * Given a DatasetVersion, returns its file metadata count per content type + * + * @param datasetVersion the DatasetVersion to access + * @param searchCriteria for counting only files matching this criteria + * @return Map of file metadata counts per content type + */ + public Map getFileMetadataCountPerContentType(DatasetVersion datasetVersion, FileSearchCriteria searchCriteria) { + CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder(); + CriteriaQuery criteriaQuery = criteriaBuilder.createTupleQuery(); + Root fileMetadataRoot = criteriaQuery.from(FileMetadata.class); + Path contentType = fileMetadataRoot.get("dataFile").get("contentType"); + criteriaQuery + .multiselect(contentType, criteriaBuilder.count(contentType)) + .where(createSearchCriteriaPredicate(datasetVersion, searchCriteria, criteriaBuilder, criteriaQuery, fileMetadataRoot)) + .groupBy(contentType); + return getStringLongMapResultFromQuery(criteriaQuery); + } + + /** + * Given a DatasetVersion, returns its file metadata count per category name + * + * @param datasetVersion the DatasetVersion to access + * @param searchCriteria for counting only files matching this criteria + * @return Map of file metadata counts per category name + */ + public Map getFileMetadataCountPerCategoryName(DatasetVersion datasetVersion, FileSearchCriteria searchCriteria) { + CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder(); + CriteriaQuery criteriaQuery = criteriaBuilder.createTupleQuery(); + Root fileMetadataRoot = criteriaQuery.from(FileMetadata.class); + Root dataFileCategoryRoot = criteriaQuery.from(DataFileCategory.class); + Path categoryName = dataFileCategoryRoot.get("name"); + criteriaQuery + .multiselect(categoryName, criteriaBuilder.count(fileMetadataRoot)) + .where(criteriaBuilder.and( + createSearchCriteriaPredicate(datasetVersion, searchCriteria, criteriaBuilder, criteriaQuery, fileMetadataRoot), + dataFileCategoryRoot.in(fileMetadataRoot.get("fileCategories")))) + .groupBy(categoryName); + return getStringLongMapResultFromQuery(criteriaQuery); + } + + /** + * Given a DatasetVersion, returns its file metadata count per DataFileTag.TagType + * + * @param datasetVersion the DatasetVersion to access + * @param searchCriteria for counting only files matching this criteria + * @return Map of file metadata counts per DataFileTag.TagType + */ + public Map getFileMetadataCountPerTabularTagName(DatasetVersion datasetVersion, FileSearchCriteria searchCriteria) { + CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder(); + CriteriaQuery criteriaQuery = criteriaBuilder.createTupleQuery(); + Root fileMetadataRoot = criteriaQuery.from(FileMetadata.class); + Root dataFileTagRoot = criteriaQuery.from(DataFileTag.class); + Path dataFileTagType = dataFileTagRoot.get("type"); + criteriaQuery + .multiselect(dataFileTagType, criteriaBuilder.count(fileMetadataRoot)) + .where(criteriaBuilder.and( + createSearchCriteriaPredicate(datasetVersion, searchCriteria, criteriaBuilder, criteriaQuery, fileMetadataRoot), + dataFileTagRoot.in(fileMetadataRoot.get("dataFile").get("dataFileTags")))) + .groupBy(dataFileTagType); + List tagNameOccurrences = em.createQuery(criteriaQuery).getResultList(); + Map result = new HashMap<>(); + for (Tuple occurrence : tagNameOccurrences) { + result.put(occurrence.get(0, DataFileTag.TagType.class), occurrence.get(1, Long.class)); + } + return result; + } + + /** + * Given a DatasetVersion, returns its file metadata count per FileAccessStatus + * + * @param datasetVersion the DatasetVersion to access + * @param searchCriteria for counting only files matching this criteria + * @return Map of file metadata counts per FileAccessStatus + */ + public Map getFileMetadataCountPerAccessStatus(DatasetVersion datasetVersion, FileSearchCriteria searchCriteria) { + Map allCounts = new HashMap<>(); + addAccessStatusCountToTotal(datasetVersion, allCounts, FileAccessStatus.Public, searchCriteria); + addAccessStatusCountToTotal(datasetVersion, allCounts, FileAccessStatus.Restricted, searchCriteria); + addAccessStatusCountToTotal(datasetVersion, allCounts, FileAccessStatus.EmbargoedThenPublic, searchCriteria); + addAccessStatusCountToTotal(datasetVersion, allCounts, FileAccessStatus.EmbargoedThenRestricted, searchCriteria); + return allCounts; + } + + /** + * Returns a FileMetadata list of files in the specified DatasetVersion + * + * @param datasetVersion the DatasetVersion to access + * @param limit for pagination, can be null + * @param offset for pagination, can be null + * @param searchCriteria for retrieving only files matching this criteria + * @param orderCriteria a FileOrderCriteria to order the results + * @return a FileMetadata list from the specified DatasetVersion + */ + public List getFileMetadatas(DatasetVersion datasetVersion, Integer limit, Integer offset, FileSearchCriteria searchCriteria, FileOrderCriteria orderCriteria) { + CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder(); + CriteriaQuery criteriaQuery = criteriaBuilder.createQuery(FileMetadata.class); + Root fileMetadataRoot = criteriaQuery.from(FileMetadata.class); + criteriaQuery + .select(fileMetadataRoot) + .where(createSearchCriteriaPredicate(datasetVersion, searchCriteria, criteriaBuilder, criteriaQuery, fileMetadataRoot)) + .orderBy(createGetFileMetadatasOrder(criteriaBuilder, orderCriteria, fileMetadataRoot)); + TypedQuery typedQuery = em.createQuery(criteriaQuery); + if (limit != null) { + typedQuery.setMaxResults(limit); + } + if (offset != null) { + typedQuery.setFirstResult(offset); + } + return typedQuery.getResultList(); + } + + /** + * Returns the total download size of all files for a particular DatasetVersion + * + * @param datasetVersion the DatasetVersion to access + * @param searchCriteria for retrieving only files matching this criteria + * @param mode a FileDownloadSizeMode to base the search on + * @return long value of total file download size + */ + public long getFilesDownloadSize(DatasetVersion datasetVersion, FileSearchCriteria searchCriteria, FileDownloadSizeMode mode) { + return switch (mode) { + case All -> + Long.sum(getOriginalTabularFilesSize(datasetVersion, searchCriteria), getArchivalFilesSize(datasetVersion, false, searchCriteria)); + case Original -> + Long.sum(getOriginalTabularFilesSize(datasetVersion, searchCriteria), getArchivalFilesSize(datasetVersion, true, searchCriteria)); + case Archival -> getArchivalFilesSize(datasetVersion, false, searchCriteria); + }; + } + + private void addAccessStatusCountToTotal(DatasetVersion datasetVersion, Map totalCounts, FileAccessStatus dataFileAccessStatus, FileSearchCriteria searchCriteria) { + long fileMetadataCount = getFileMetadataCountByAccessStatus(datasetVersion, dataFileAccessStatus, searchCriteria); + if (fileMetadataCount > 0) { + totalCounts.put(dataFileAccessStatus, fileMetadataCount); + } + } + + private long getFileMetadataCountByAccessStatus(DatasetVersion datasetVersion, FileAccessStatus accessStatus, FileSearchCriteria searchCriteria) { + CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder(); + CriteriaQuery criteriaQuery = criteriaBuilder.createQuery(Long.class); + Root fileMetadataRoot = criteriaQuery.from(FileMetadata.class); + criteriaQuery + .select(criteriaBuilder.count(fileMetadataRoot)) + .where(criteriaBuilder.and( + createSearchCriteriaAccessStatusPredicate(accessStatus, criteriaBuilder, fileMetadataRoot), + createSearchCriteriaPredicate(datasetVersion, searchCriteria, criteriaBuilder, criteriaQuery, fileMetadataRoot))); + return em.createQuery(criteriaQuery).getSingleResult(); + } + + private Predicate createSearchCriteriaAccessStatusPredicate(FileAccessStatus accessStatus, CriteriaBuilder criteriaBuilder, Root fileMetadataRoot) { + Path dataFile = fileMetadataRoot.get("dataFile"); + Path embargo = dataFile.get("embargo"); + Predicate activelyEmbargoedPredicate = criteriaBuilder.greaterThanOrEqualTo(embargo.get("dateAvailable"), criteriaBuilder.currentDate()); + Predicate inactivelyEmbargoedPredicate = criteriaBuilder.isNull(embargo); + Path isRestricted = dataFile.get("restricted"); + Predicate isRestrictedPredicate = criteriaBuilder.isTrue(isRestricted); + Predicate isUnrestrictedPredicate = criteriaBuilder.isFalse(isRestricted); + return switch (accessStatus) { + case EmbargoedThenRestricted -> criteriaBuilder.and(activelyEmbargoedPredicate, isRestrictedPredicate); + case EmbargoedThenPublic -> criteriaBuilder.and(activelyEmbargoedPredicate, isUnrestrictedPredicate); + case Restricted -> criteriaBuilder.and(inactivelyEmbargoedPredicate, isRestrictedPredicate); + case Public -> criteriaBuilder.and(inactivelyEmbargoedPredicate, isUnrestrictedPredicate); + }; + } + + private Predicate createSearchCriteriaPredicate(DatasetVersion datasetVersion, + FileSearchCriteria searchCriteria, + CriteriaBuilder criteriaBuilder, + CriteriaQuery criteriaQuery, + Root fileMetadataRoot) { + List predicates = new ArrayList<>(); + Predicate basePredicate = criteriaBuilder.equal(fileMetadataRoot.get("datasetVersion").get("id"), datasetVersion.getId()); + predicates.add(basePredicate); + String contentType = searchCriteria.getContentType(); + if (contentType != null) { + predicates.add(criteriaBuilder.equal(fileMetadataRoot.get("dataFile").get("contentType"), contentType)); + } + FileAccessStatus accessStatus = searchCriteria.getAccessStatus(); + if (accessStatus != null) { + predicates.add(createSearchCriteriaAccessStatusPredicate(accessStatus, criteriaBuilder, fileMetadataRoot)); + } + String categoryName = searchCriteria.getCategoryName(); + if (categoryName != null) { + Root dataFileCategoryRoot = criteriaQuery.from(DataFileCategory.class); + predicates.add(criteriaBuilder.equal(dataFileCategoryRoot.get("name"), categoryName)); + predicates.add(dataFileCategoryRoot.in(fileMetadataRoot.get("fileCategories"))); + } + String tabularTagName = searchCriteria.getTabularTagName(); + if (tabularTagName != null) { + Root dataFileTagRoot = criteriaQuery.from(DataFileTag.class); + predicates.add(criteriaBuilder.equal(dataFileTagRoot.get("type"), TagLabelToTypes.get(tabularTagName))); + predicates.add(dataFileTagRoot.in(fileMetadataRoot.get("dataFile").get("dataFileTags"))); + } + String searchText = searchCriteria.getSearchText(); + if (searchText != null && !searchText.isEmpty()) { + searchText = searchText.trim().toLowerCase(); + predicates.add(criteriaBuilder.like(fileMetadataRoot.get("label"), "%" + searchText + "%")); + } + return criteriaBuilder.and(predicates.toArray(new Predicate[]{})); + } + + private List createGetFileMetadatasOrder(CriteriaBuilder criteriaBuilder, + FileOrderCriteria orderCriteria, + Root fileMetadataRoot) { + Path label = fileMetadataRoot.get("label"); + Path dataFile = fileMetadataRoot.get("dataFile"); + Path publicationDate = dataFile.get("publicationDate"); + Path createDate = dataFile.get("createDate"); + Expression orderByLifetimeExpression = criteriaBuilder.selectCase().when(publicationDate.isNotNull(), publicationDate).otherwise(createDate); + List orderList = new ArrayList<>(); + switch (orderCriteria) { + case NameZA -> orderList.add(criteriaBuilder.desc(label)); + case Newest -> orderList.add(criteriaBuilder.desc(orderByLifetimeExpression)); + case Oldest -> orderList.add(criteriaBuilder.asc(orderByLifetimeExpression)); + case Size -> orderList.add(criteriaBuilder.asc(dataFile.get("filesize"))); + case Type -> { + orderList.add(criteriaBuilder.asc(dataFile.get("contentType"))); + orderList.add(criteriaBuilder.asc(label)); + } + default -> orderList.add(criteriaBuilder.asc(label)); + } + return orderList; + } + + private long getOriginalTabularFilesSize(DatasetVersion datasetVersion, FileSearchCriteria searchCriteria) { + CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder(); + CriteriaQuery criteriaQuery = criteriaBuilder.createQuery(Long.class); + Root fileMetadataRoot = criteriaQuery.from(FileMetadata.class); + Root dataTableRoot = criteriaQuery.from(DataTable.class); + criteriaQuery + .select(criteriaBuilder.sum(dataTableRoot.get("originalFileSize"))) + .where(criteriaBuilder.and( + criteriaBuilder.equal(dataTableRoot.get("dataFile"), fileMetadataRoot.get("dataFile")), + createSearchCriteriaPredicate(datasetVersion, searchCriteria, criteriaBuilder, criteriaQuery, fileMetadataRoot))); + Long result = em.createQuery(criteriaQuery).getSingleResult(); + return (result == null) ? 0 : result; + } + + private long getArchivalFilesSize(DatasetVersion datasetVersion, boolean ignoreTabular, FileSearchCriteria searchCriteria) { + CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder(); + CriteriaQuery criteriaQuery = criteriaBuilder.createQuery(Long.class); + Root fileMetadataRoot = criteriaQuery.from(FileMetadata.class); + Predicate searchCriteriaPredicate = createSearchCriteriaPredicate(datasetVersion, searchCriteria, criteriaBuilder, criteriaQuery, fileMetadataRoot); + Predicate wherePredicate; + if (ignoreTabular) { + wherePredicate = criteriaBuilder.and(searchCriteriaPredicate, criteriaBuilder.isEmpty(fileMetadataRoot.get("dataFile").get("dataTables"))); + } else { + wherePredicate = searchCriteriaPredicate; + } + criteriaQuery + .select(criteriaBuilder.sum(fileMetadataRoot.get("dataFile").get("filesize"))) + .where(wherePredicate); + Long result = em.createQuery(criteriaQuery).getSingleResult(); + return (result == null) ? 0 : result; + } + + private Map getStringLongMapResultFromQuery(CriteriaQuery criteriaQuery) { + List categoryNameOccurrences = em.createQuery(criteriaQuery).getResultList(); + Map result = new HashMap<>(); + for (Tuple occurrence : categoryNameOccurrences) { + result.put(occurrence.get(0, String.class), occurrence.get(1, Long.class)); + } + return result; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 28243c37eee..1ee517c9831 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -48,7 +48,7 @@ public class DatasetVersionServiceBean implements java.io.Serializable { private static final Logger logger = Logger.getLogger(DatasetVersionServiceBean.class.getCanonicalName()); private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); - + @EJB DatasetServiceBean datasetService; @@ -149,7 +149,7 @@ public DatasetVersion getDatasetVersion(){ return this.datasetVersionForResponse; } } // end RetrieveDatasetVersionResponse - + public DatasetVersion find(Object pk) { return em.find(DatasetVersion.class, pk); } @@ -166,9 +166,44 @@ public DatasetVersion findDeep(Object pk) { .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.datasetVersion") .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.releaseUser") .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.creator") + .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.dataFileTags") .getSingleResult(); } - + + /** + * Performs the same database lookup as the one behind Dataset.getVersions(). + * Additionally, provides the arguments for selecting a partial list of + * (length-offset) versions for pagination, plus the ability to pre-select + * only the publicly-viewable versions. + * It is recommended that individual software components utilize the + * ListVersionsCommand, instead of calling this service method directly. + * @param datasetId + * @param offset for pagination through long lists of versions + * @param length for pagination through long lists of versions + * @param includeUnpublished retrieves all the versions, including drafts and deaccessioned. + * @return (partial) list of versions + */ + public List findVersions(Long datasetId, Integer offset, Integer length, boolean includeUnpublished) { + TypedQuery query; + if (includeUnpublished) { + query = em.createNamedQuery("DatasetVersion.findByDataset", DatasetVersion.class); + } else { + query = em.createNamedQuery("DatasetVersion.findReleasedByDataset", DatasetVersion.class) + .setParameter("datasetId", datasetId); + } + + query.setParameter("datasetId", datasetId); + + if (offset != null) { + query.setFirstResult(offset); + } + if (length != null) { + query.setMaxResults(length); + } + + return query.getResultList(); + } + public DatasetVersion findByFriendlyVersionNumber(Long datasetId, String friendlyVersionNumber) { Long majorVersionNumber = null; Long minorVersionNumber = null; @@ -460,10 +495,24 @@ private DatasetVersion getDatasetVersionByQuery(String queryString){ } } // end getDatasetVersionByQuery - - - - public DatasetVersion retrieveDatasetVersionByIdentiferClause(String identifierClause, String version){ + /** + * @deprecated because of a typo; use {@link #retrieveDatasetVersionByIdentifierClause(String, String) retrieveDatasetVersionByIdentifierClause} instead + * @see #retrieveDatasetVersionByIdentifierClause(String, String) + * @param identifierClause + * @param version + * @return a DatasetVersion if found, or {@code null} otherwise + */ + @Deprecated + public DatasetVersion retrieveDatasetVersionByIdentiferClause(String identifierClause, String version) { + return retrieveDatasetVersionByIdentifierClause(identifierClause, version); + } + + /** + * @param identifierClause + * @param version + * @return a DatasetVersion if found, or {@code null} otherwise + */ + public DatasetVersion retrieveDatasetVersionByIdentifierClause(String identifierClause, String version) { if (identifierClause == null){ return null; @@ -585,7 +634,7 @@ public RetrieveDatasetVersionResponse retrieveDatasetVersionByPersistentId(Strin identifierClause += " AND ds.identifier = '" + parsedId.getIdentifier() + "'"; - DatasetVersion ds = retrieveDatasetVersionByIdentiferClause(identifierClause, version); + DatasetVersion ds = retrieveDatasetVersionByIdentifierClause(identifierClause, version); if (ds != null){ msg("retrieved dataset: " + ds.getId() + " semantic: " + ds.getSemanticVersion()); @@ -683,7 +732,7 @@ public DatasetVersion getDatasetVersionById(Long datasetId, String version){ String identifierClause = this.getIdClause(datasetId); - DatasetVersion ds = retrieveDatasetVersionByIdentiferClause(identifierClause, version); + DatasetVersion ds = retrieveDatasetVersionByIdentifierClause(identifierClause, version); return ds; @@ -776,7 +825,7 @@ public Long getThumbnailByVersionId(Long versionId) { + "AND df.id = o.id " + "AND fm.datasetversion_id = dv.id " + "AND fm.datafile_id = df.id " - // + "AND o.previewImageAvailable = false " + + "AND o.previewimagefail = false " + "AND df.restricted = false " + "AND df.embargo_id is null " + "AND df.contenttype LIKE 'image/%' " @@ -810,7 +859,7 @@ public Long getThumbnailByVersionId(Long versionId) { + "AND df.id = o.id " + "AND fm.datasetversion_id = dv.id " + "AND fm.datafile_id = df.id " - // + "AND o.previewImageAvailable = false " + + "AND o.previewimagefail = false " + "AND df.restricted = false " + "AND df.embargo_id is null " + "AND df.contenttype = 'application/pdf' " diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java index 682c1dc6744..c1de9d63410 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java @@ -3,6 +3,7 @@ import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearch; +import edu.harvard.iq.dataverse.storageuse.StorageUse; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -103,7 +104,11 @@ public enum DataverseType { * dataverses. */ protected boolean permissionRoot; - + + public Dataverse() { + StorageUse storageUse = new StorageUse(this); + this.setStorageUse(storageUse); + } public DataverseType getDataverseType() { return dataverseType; diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseContact.java b/src/main/java/edu/harvard/iq/dataverse/DataverseContact.java index d77767985eb..9f86a03639a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseContact.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseContact.java @@ -99,7 +99,7 @@ public int hashCode() { @Override public boolean equals(Object object) { - if (!(object instanceof DatasetFieldType)) { + if (!(object instanceof DataverseContact)) { return false; } DataverseContact other = (DataverseContact) object; diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFacet.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFacet.java index 768c2308e50..83a2d8fdb8f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseFacet.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFacet.java @@ -93,7 +93,7 @@ public int hashCode() { @Override public boolean equals(Object object) { - if (!(object instanceof DatasetFieldType)) { + if (!(object instanceof DataverseFacet)) { return false; } DataverseFacet other = (DataverseFacet) object; diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFeaturedDataverse.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFeaturedDataverse.java index 39ad6ca9520..d30d94cd034 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseFeaturedDataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFeaturedDataverse.java @@ -85,7 +85,7 @@ public int hashCode() { @Override public boolean equals(Object object) { - if (!(object instanceof DatasetFieldType)) { + if (!(object instanceof DataverseFeaturedDataverse)) { return false; } DataverseFeaturedDataverse other = (DataverseFeaturedDataverse) object; diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java index c4749be0cb3..a3425987bf8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java @@ -30,8 +30,9 @@ @NamedQuery(name = "DataverseFieldTypeInputLevel.findByDataverseIdDatasetFieldTypeId", query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.datasetFieldType.id = :datasetFieldTypeId"), @NamedQuery(name = "DataverseFieldTypeInputLevel.findByDataverseIdAndDatasetFieldTypeIdList", - query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.datasetFieldType.id in :datasetFieldIdList") - + query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.datasetFieldType.id in :datasetFieldIdList"), + @NamedQuery(name = "DataverseFieldTypeInputLevel.findRequiredByDataverseId", + query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.required = 'true' ") }) @Table(name="DataverseFieldTypeInputLevel" , uniqueConstraints={ diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java index 66c700f59ce..1bd290ecc4d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java @@ -88,6 +88,16 @@ public DataverseFieldTypeInputLevel findByDataverseIdDatasetFieldTypeId(Long dat return null; } } + + public List findRequiredByDataverseId(Long dataverseId) { + Query query = em.createNamedQuery("DataverseFieldTypeInputLevel.findRequiredByDataverseId", DataverseFieldTypeInputLevel.class); + query.setParameter("dataverseId", dataverseId); + try{ + return query.getResultList(); + } catch ( NoResultException nre ) { + return null; + } + } public void delete(DataverseFieldTypeInputLevel dataverseFieldTypeInputLevel) { em.remove(em.merge(dataverseFieldTypeInputLevel)); diff --git a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java index daf33f444d9..10dfa4a0e4f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java @@ -15,6 +15,9 @@ import edu.harvard.iq.dataverse.engine.command.impl.LinkDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.PublishDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseCommand; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactoryBean; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.search.FacetCategory; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.search.SearchFields; @@ -34,9 +37,12 @@ import jakarta.faces.view.ViewScoped; import jakarta.inject.Inject; import jakarta.inject.Named; + +import java.util.AbstractMap; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; import java.util.Map.Entry; import java.util.Set; @@ -109,7 +115,9 @@ public enum LinkMode { @EJB DataverseLinkingServiceBean linkingService; @Inject PermissionsWrapper permissionsWrapper; - @Inject DataverseHeaderFragment dataverseHeaderFragment; + @Inject DataverseHeaderFragment dataverseHeaderFragment; + @EJB + PidProviderFactoryBean pidProviderFactoryBean; private Dataverse dataverse = new Dataverse(); @@ -362,7 +370,7 @@ public void initFeaturedDataverses() { List featuredSource = new ArrayList<>(); List featuredTarget = new ArrayList<>(); featuredSource.addAll(dataverseService.findAllPublishedByOwnerId(dataverse.getId())); - featuredSource.addAll(linkingService.findLinkingDataverses(dataverse.getId())); + featuredSource.addAll(linkingService.findLinkedDataverses(dataverse.getId())); List featuredList = featuredDataverseService.findByDataverseId(dataverse.getId()); for (DataverseFeaturedDataverse dfd : featuredList) { Dataverse fd = dfd.getFeaturedDataverse(); @@ -1286,4 +1294,28 @@ public String getCurationLabelSetNameLabel() { return setName; } + public Set> getGuestbookEntryOptions() { + return settingsWrapper.getGuestbookEntryOptions(this.dataverse).entrySet(); + } + + public Set> getPidProviderOptions() { + PidProvider defaultPidProvider = pidProviderFactoryBean.getDefaultPidGenerator(); + Set providerIds = PidUtil.getManagedProviderIds(); + Set> options = new HashSet>(); + if (providerIds.size() > 1) { + String label = defaultPidProvider.getLabel() + BundleUtil.getStringFromBundle("dataverse.default") + ": " + + defaultPidProvider.getProtocol() + ":" + defaultPidProvider.getAuthority() + + defaultPidProvider.getSeparator() + defaultPidProvider.getShoulder(); + Entry option = new AbstractMap.SimpleEntry("default", label); + options.add(option); + } + for (String providerId : providerIds) { + PidProvider pidProvider = PidUtil.getPidProvider(providerId); + String label = pidProvider.getLabel() + ": " + pidProvider.getProtocol() + ":" + pidProvider.getAuthority() + + pidProvider.getSeparator() + pidProvider.getShoulder(); + Entry option = new AbstractMap.SimpleEntry(providerId, label); + options.add(option); + } + return options; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 7194a1ef31e..10b5d800c21 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -18,8 +18,11 @@ import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.search.SolrIndexServiceBean; import edu.harvard.iq.dataverse.search.SolrSearchResult; +import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.storageuse.StorageQuota; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.io.File; import java.io.IOException; import java.sql.Timestamp; @@ -42,7 +45,15 @@ import jakarta.persistence.NonUniqueResultException; import jakarta.persistence.PersistenceContext; import jakarta.persistence.TypedQuery; +import java.nio.file.Files; +import java.nio.file.Paths; +import org.apache.commons.lang3.StringUtils; import org.apache.solr.client.solrj.SolrServerException; +import org.everit.json.schema.Schema; +import org.everit.json.schema.ValidationException; +import org.everit.json.schema.loader.SchemaLoader; +import org.json.JSONObject; +import org.json.JSONTokener; /** * @@ -80,6 +91,9 @@ public class DataverseServiceBean implements java.io.Serializable { @EJB PermissionServiceBean permissionService; + @EJB + DataverseFieldTypeInputLevelServiceBean dataverseFieldTypeInputLevelService; + @EJB SystemConfig systemConfig; @@ -346,51 +360,6 @@ public String getDataverseLogoThumbnailAsBase64ById(Long dvId) { } return null; } - - /* - public boolean isDataverseLogoThumbnailAvailable(Dataverse dataverse, User user) { - if (dataverse == null) { - return false; - } - - // First, check if the dataverse has a defined logo: - - //if (dataverse.getDataverseTheme() != null && dataverse.getDataverseTheme().getLogo() != null && !dataverse.getDataverseTheme().getLogo().equals("")) { - File dataverseLogoFile = getLogo(dataverse); - if (dataverseLogoFile != null) { - String logoThumbNailPath = null; - - if (dataverseLogoFile.exists()) { - logoThumbNailPath = ImageThumbConverter.generateImageThumbnailFromFile(dataverseLogoFile.getAbsolutePath(), 48); - if (logoThumbNailPath != null) { - return true; - } - } - } - //} - */ - // If there's no uploaded logo for this dataverse, go through its - // [released] datasets and see if any of them have card images: - // - // TODO: - // Discuss/Decide if we really want to do this - i.e., go through every - // file in every dataset below... - // -- L.A. 4.0 beta14 - /* - for (Dataset dataset : datasetService.findPublishedByOwnerId(dataverse.getId())) { - if (dataset != null) { - DatasetVersion releasedVersion = dataset.getReleasedVersion(); - - if (releasedVersion != null) { - if (datasetService.isDatasetCardImageAvailable(releasedVersion, user)) { - return true; - } - } - } - } */ - /* - return false; - } */ private File getLogo(Dataverse dataverse) { if (dataverse.getId() == null) { @@ -399,16 +368,7 @@ private File getLogo(Dataverse dataverse) { DataverseTheme theme = dataverse.getDataverseTheme(); if (theme != null && theme.getLogo() != null && !theme.getLogo().isEmpty()) { - Properties p = System.getProperties(); - String domainRoot = p.getProperty("com.sun.aas.instanceRoot"); - - if (domainRoot != null && !"".equals(domainRoot)) { - return new File (domainRoot + File.separator + - "docroot" + File.separator + - "logos" + File.separator + - dataverse.getLogoOwnerId() + File.separator + - theme.getLogo()); - } + return ThemeWidgetFragment.getLogoDir(dataverse.getLogoOwnerId()).resolve(theme.getLogo()).toFile(); } return null; @@ -928,5 +888,294 @@ public List getDatasetTitlesWithinDataverse(Long dataverseId) { return em.createNativeQuery(cqString).getResultList(); } + + public String getCollectionDatasetSchema(String dataverseAlias) { + + Dataverse testDV = this.findByAlias(dataverseAlias); + + while (!testDV.isMetadataBlockRoot()) { + if (testDV.getOwner() == null) { + break; // we are at the root; which by defintion is metadata blcok root, regarldess of the value + } + testDV = testDV.getOwner(); + } + + /* Couldn't get the 'return base if no extra required fields to work with the path provided + leaving it as 'out of scope' for now SEK 11/27/2023 + + List required = new ArrayList<>(); + + required = dataverseFieldTypeInputLevelService.findRequiredByDataverseId(testDV.getId()); + + if (required == null || required.isEmpty()){ + String pathToJsonFile = "src/main/resources/edu/harvas/iq/dataverse/baseDatasetSchema.json"; + String baseSchema = getBaseSchemaStringFromFile(pathToJsonFile); + if (baseSchema != null && !baseSchema.isEmpty()){ + return baseSchema; + } + } + + */ + List selectedBlocks = new ArrayList<>(); + List requiredDSFT = new ArrayList<>(); + + selectedBlocks.addAll(testDV.getMetadataBlocks()); + + for (MetadataBlock mdb : selectedBlocks) { + for (DatasetFieldType dsft : mdb.getDatasetFieldTypes()) { + if (!dsft.isChild()) { + DataverseFieldTypeInputLevel dsfIl = dataverseFieldTypeInputLevelService.findByDataverseIdDatasetFieldTypeId(testDV.getId(), dsft.getId()); + if (dsfIl != null) { + dsft.setRequiredDV(dsfIl.isRequired()); + dsft.setInclude(dsfIl.isInclude()); + } else { + dsft.setRequiredDV(dsft.isRequired()); + dsft.setInclude(true); + } + if (dsft.isHasChildren()) { + for (DatasetFieldType child : dsft.getChildDatasetFieldTypes()) { + DataverseFieldTypeInputLevel dsfIlChild = dataverseFieldTypeInputLevelService.findByDataverseIdDatasetFieldTypeId(testDV.getId(), child.getId()); + if (dsfIlChild != null) { + child.setRequiredDV(dsfIlChild.isRequired()); + child.setInclude(dsfIlChild.isInclude()); + } else { + // in the case of conditionally required (child = true, parent = false) + // we set this to false; i.e this is the default "don't override" value + child.setRequiredDV(child.isRequired() && dsft.isRequired()); + child.setInclude(true); + } + } + } + if(dsft.isRequiredDV()){ + requiredDSFT.add(dsft); + } + } + } + + } + + String reqMDBNames = ""; + List hasReqFields = new ArrayList<>(); + String retval = datasetSchemaPreface; + for (MetadataBlock mdb : selectedBlocks) { + for (DatasetFieldType dsft : requiredDSFT) { + if (dsft.getMetadataBlock().equals(mdb)) { + hasReqFields.add(mdb); + if (!reqMDBNames.isEmpty()) reqMDBNames += ","; + reqMDBNames += "\"" + mdb.getName() + "\""; + break; + } + } + } + int countMDB = 0; + for (MetadataBlock mdb : hasReqFields) { + if (countMDB>0){ + retval += ","; + } + retval += getCustomMDBSchema(mdb, requiredDSFT); + countMDB++; + } + + retval += "\n }"; + + retval += endOfjson.replace("blockNames", reqMDBNames); + + return retval; + + } + + private String getCustomMDBSchema (MetadataBlock mdb, List requiredDSFT){ + String retval = ""; + boolean mdbHasReqField = false; + int numReq = 0; + List requiredThisMDB = new ArrayList<>(); + + for (DatasetFieldType dsft : requiredDSFT ){ + + if(dsft.getMetadataBlock().equals(mdb)){ + numReq++; + mdbHasReqField = true; + requiredThisMDB.add(dsft); + } + } + if (mdbHasReqField){ + retval += startOfMDB.replace("blockName", mdb.getName()); + + retval += minItemsTemplate.replace("numMinItems", Integer.toString(requiredThisMDB.size())); + int count = 0; + for (DatasetFieldType dsft:requiredThisMDB ){ + count++; + String reqValImp = reqValTemplate.replace("reqFieldTypeName", dsft.getName()); + if (count < requiredThisMDB.size()){ + retval += reqValImp + "\n"; + } else { + reqValImp = StringUtils.substring(reqValImp, 0, reqValImp.length() - 1); + retval += reqValImp+ "\n"; + retval += endOfReqVal; + } + } + + } + + return retval; + } + + public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { + JSONObject rawSchema = new JSONObject(new JSONTokener(getCollectionDatasetSchema(dataverseAlias))); + + try { + Schema schema = SchemaLoader.load(rawSchema); + schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid + } catch (ValidationException vx) { + logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage()); + String accumulatedexceptions = ""; + for (ValidationException va : vx.getCausingExceptions()){ + accumulatedexceptions = accumulatedexceptions + va; + accumulatedexceptions = accumulatedexceptions.replace("org.everit.json.schema.ValidationException:", " "); + } + if (!accumulatedexceptions.isEmpty()){ + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + accumulatedexceptions; + } else { + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage(); + } + + } catch (Exception ex) { + logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage()); + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage(); + } + + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.succeeded"); + } + + static String getBaseSchemaStringFromFile(String pathToJsonFile) { + File datasetSchemaJson = new File(pathToJsonFile); + try { + String datasetSchemaAsJson = new String(Files.readAllBytes(Paths.get(datasetSchemaJson.getAbsolutePath()))); + return datasetSchemaAsJson; + } catch (IOException ex) { + logger.info("IO - failed to get schema file - will build on fly " +ex.getMessage()); + return null; + } catch (Exception e){ + logger.info("Other exception - failed to get schema file - will build on fly. " + e.getMessage()); + return null; + } + } + private String datasetSchemaPreface = + "{\n" + + " \"$schema\": \"http://json-schema.org/draft-04/schema#\",\n" + + " \"$defs\": {\n" + + " \"field\": {\n" + + " \"type\": \"object\",\n" + + " \"required\": [\"typeClass\", \"multiple\", \"typeName\"],\n" + + " \"properties\": {\n" + + " \"value\": {\n" + + " \"anyOf\": [\n" + + " {\n" + + " \"type\": \"array\"\n" + + " },\n" + + " {\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"$ref\": \"#/$defs/field\"\n" + + " }\n" + + " ]\n" + + " },\n" + + " \"typeClass\": {\n" + + " \"type\": \"string\"\n" + + " },\n" + + " \"multiple\": {\n" + + " \"type\": \"boolean\"\n" + + " },\n" + + " \"typeName\": {\n" + + " \"type\": \"string\"\n" + + " }\n" + + " }\n" + + " }\n" + + "},\n" + + "\"type\": \"object\",\n" + + "\"properties\": {\n" + + " \"datasetVersion\": {\n" + + " \"type\": \"object\",\n" + + " \"properties\": {\n" + + " \"license\": {\n" + + " \"type\": \"object\",\n" + + " \"properties\": {\n" + + " \"name\": {\n" + + " \"type\": \"string\"\n" + + " },\n" + + " \"uri\": {\n" + + " \"type\": \"string\",\n" + + " \"format\": \"uri\"\n" + + " }\n" + + " },\n" + + " \"required\": [\"name\", \"uri\"]\n" + + " },\n" + + " \"metadataBlocks\": {\n" + + " \"type\": \"object\",\n" + + " \"properties\": {\n" + + "" ; + + private String startOfMDB = "" + +" \"blockName\": {\n" + +" \"type\": \"object\",\n" + +" \"properties\": {\n" + +" \"fields\": {\n" + +" \"type\": \"array\",\n" + +" \"items\": {\n" + +" \"$ref\": \"#/$defs/field\"\n" + +" },"; + + private String reqValTemplate = " {\n" + +" \"contains\": {\n" + +" \"properties\": {\n" + +" \"typeName\": {\n" + +" \"const\": \"reqFieldTypeName\"\n" + +" }\n" + +" }\n" + +" }\n" + +" },"; + + private String minItemsTemplate = "\n \"minItems\": numMinItems,\n" + +" \"allOf\": [\n"; + private String endOfReqVal = " ]\n" + +" }\n" + +" },\n" + +" \"required\": [\"fields\"]\n" + +" }"; + + private String endOfjson = ",\n" + +" \"required\": [blockNames]\n" + +" }\n" + +" },\n" + +" \"required\": [\"metadataBlocks\"]\n" + +" }\n" + +" },\n" + +" \"required\": [\"datasetVersion\"]\n" + +"}\n"; + + public void saveStorageQuota(Dataverse target, Long allocation) { + StorageQuota storageQuota = target.getStorageQuota(); + + if (storageQuota != null) { + storageQuota.setAllocation(allocation); + em.merge(storageQuota); + } else { + storageQuota = new StorageQuota(); + storageQuota.setDefinitionPoint(target); + storageQuota.setAllocation(allocation); + target.setStorageQuota(storageQuota); + em.persist(storageQuota); + } + em.flush(); + } + + public void disableStorageQuota(StorageQuota storageQuota) { + if (storageQuota != null && storageQuota.getAllocation() != null) { + storageQuota.setAllocation(null); + em.merge(storageQuota); + em.flush(); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseTheme.java b/src/main/java/edu/harvard/iq/dataverse/DataverseTheme.java index 539669328a7..7f57d16b95a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseTheme.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseTheme.java @@ -181,7 +181,7 @@ public int hashCode() { @Override public boolean equals(Object object) { - if (!(object instanceof DatasetFieldType)) { + if (!(object instanceof DataverseTheme)) { return false; } DataverseTheme other = (DataverseTheme) object; diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 9e7f3f3fe96..cc5d7620969 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.storageuse.StorageQuota; import java.sql.Timestamp; import java.text.SimpleDateFormat; @@ -155,7 +156,7 @@ public String visit(DataFile df) { private String identifier; private boolean identifierRegistered; - + private transient GlobalId globalId = null; @OneToMany(mappedBy = "dvObject", cascade = CascadeType.ALL, orphanRemoval = true) @@ -177,6 +178,9 @@ public void setAlternativePersistentIndentifiers(Set roleAssignments; + } diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java index a322a25103e..c991c4c02d2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java @@ -1,8 +1,20 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.storageuse.StorageUse; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; +import jakarta.persistence.CascadeType; +import java.util.Optional; import jakarta.persistence.MappedSuperclass; +import jakarta.persistence.OneToOne; +import jakarta.persistence.Transient; + import org.apache.commons.lang3.StringUtils; /** @@ -12,10 +24,8 @@ */ @MappedSuperclass public abstract class DvObjectContainer extends DvObject { - - - public static final String UNDEFINED_METADATA_LANGUAGE_CODE = "undefined"; //Used in dataverse.xhtml as a non-null selection option value (indicating inheriting the default) + public static final String UNDEFINED_CODE = "undefined"; //Used in dataverse.xhtml as a non-null selection option value (indicating inheriting the default) public void setOwner(Dataverse owner) { super.setOwner(owner); @@ -37,6 +47,16 @@ public boolean isEffectivelyPermissionRoot() { private String metadataLanguage=null; + private Boolean guestbookAtRequest = null; + + private String pidGeneratorSpecs = null; + + @Transient + private PidProvider pidGenerator = null; + + @OneToOne(mappedBy = "dvObjectContainer",cascade={ CascadeType.REMOVE, CascadeType.PERSIST}, orphanRemoval=true) + private StorageUse storageUse; + public String getEffectiveStorageDriverId() { String id = storageDriver; if (StringUtils.isBlank(id)) { @@ -70,7 +90,7 @@ public String getEffectiveMetadataLanguage() { if (this.getOwner() != null) { ml = this.getOwner().getEffectiveMetadataLanguage(); } else { - ml = UNDEFINED_METADATA_LANGUAGE_CODE; + ml = UNDEFINED_CODE; } } return ml; @@ -78,13 +98,13 @@ public String getEffectiveMetadataLanguage() { public String getMetadataLanguage() { if (metadataLanguage == null) { - return UNDEFINED_METADATA_LANGUAGE_CODE; + return UNDEFINED_CODE; } return metadataLanguage; } public void setMetadataLanguage(String ml) { - if (ml != null && ml.equals(UNDEFINED_METADATA_LANGUAGE_CODE)) { + if (ml != null && ml.equals(UNDEFINED_CODE)) { this.metadataLanguage = null; } else { this.metadataLanguage = ml; @@ -92,7 +112,40 @@ public void setMetadataLanguage(String ml) { } public static boolean isMetadataLanguageSet(String mdLang) { - return mdLang!=null && !mdLang.equals(UNDEFINED_METADATA_LANGUAGE_CODE); + return mdLang!=null && !mdLang.equals(UNDEFINED_CODE); + } + + public boolean getEffectiveGuestbookEntryAtRequest() { + boolean gbAtRequest = false; + if (guestbookAtRequest==null) { + if (this.getOwner() != null) { + gbAtRequest = this.getOwner().getEffectiveGuestbookEntryAtRequest(); + } else { + Optional opt = JvmSettings.GUESTBOOK_AT_REQUEST.lookupOptional(Boolean.class); + if (opt.isPresent()) { + gbAtRequest = opt.get(); + } + } + } else { + gbAtRequest = guestbookAtRequest; + } + return gbAtRequest; + } + + public String getGuestbookEntryAtRequest() { + if(guestbookAtRequest==null) { + return UNDEFINED_CODE; + } + return Boolean.valueOf(guestbookAtRequest).toString(); + } + + public void setGuestbookEntryAtRequest(String gbAtRequest) { + if (gbAtRequest == null || gbAtRequest.equals(UNDEFINED_CODE)) { + this.guestbookAtRequest = null; + } else { + //Force to true or false + this.guestbookAtRequest = Boolean.valueOf(Boolean.parseBoolean(gbAtRequest)); + } } @@ -123,5 +176,85 @@ public String getCurationLabelSetName() { public void setCurationLabelSetName(String setName) { this.externalLabelSetName = setName; } + + /** + * Should only be used in constructors for DvObjectContainers (Datasets and + * Collections), to make sure new entries are created and persisted in the + * database StorageUse table for every DvObject container we create. + * @param storageUse + */ + public void setStorageUse(StorageUse storageUse) { + this.storageUse = storageUse; + } + + + /* Dataverse collections and dataset can be configured to use different PidProviders as PID generators for contained objects (datasets or data files). + * This mechanism is similar to others except that the stored value is a JSON object defining the protocol, authority, shoulder, and, optionally, the separator for the PidProvider. + */ + + public String getPidGeneratorSpecs() { + return pidGeneratorSpecs; + } + + public void setPidGeneratorSpecs(String pidGeneratorSpecs) { + this.pidGeneratorSpecs = pidGeneratorSpecs; + } + + // Used in JSF when selecting the PidGenerator + public String getPidGeneratorId() { + PidProvider pidGenerator = getEffectivePidGenerator(); + if (pidGenerator == null) { + return "default"; + } else { + return getEffectivePidGenerator().getId(); + } + } + + //Used in JSF when setting the PidGenerator + public void setPidGeneratorId(String pidGeneratorId) { + // Note that the "default" provider will not be found so will result in + // setPidGenerator(null), which unsets the pidGenerator/Specs as desired + setPidGenerator(PidUtil.getPidProvider(pidGeneratorId)); + } + + public void setPidGenerator(PidProvider pidGenerator) { + this.pidGenerator = pidGenerator; + if (pidGenerator != null) { + JsonObjectBuilder job = jakarta.json.Json.createObjectBuilder(); + this.pidGeneratorSpecs = job.add("protocol", pidGenerator.getProtocol()) + .add("authority", pidGenerator.getAuthority()).add("shoulder", pidGenerator.getShoulder()) + .add("separator", pidGenerator.getSeparator()).build().toString(); + } else { + this.pidGeneratorSpecs = null; + } + } + + public PidProvider getEffectivePidGenerator() { + if (pidGenerator == null) { + String specs = getPidGeneratorSpecs(); + if (StringUtils.isBlank(specs)) { + GlobalId pid = getGlobalId(); + if ((pid != null) && PidUtil.getPidProvider(pid.getProviderId()).canCreatePidsLike(pid)) { + pidGenerator = PidUtil.getPidProvider(pid.getProviderId()); + } else { + if (getOwner() != null) { + pidGenerator = getOwner().getEffectivePidGenerator(); + } + } + } else { + JsonObject providerSpecs = JsonUtil.getJsonObject(specs); + if (providerSpecs.containsKey("separator")) { + pidGenerator = PidUtil.getPidProvider(providerSpecs.getString("protocol"), + providerSpecs.getString("authority"), providerSpecs.getString("shoulder"), + providerSpecs.getString("separator")); + } else { + pidGenerator = PidUtil.getPidProvider(providerSpecs.getString("protocol"), + providerSpecs.getString("authority"), providerSpecs.getString("shoulder")); + } + } + setPidGenerator(pidGenerator); + } + return pidGenerator; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java index d4219c36149..bd7fbeaff10 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java @@ -1,8 +1,9 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactoryBean; import edu.harvard.iq.dataverse.pidproviders.PidUtil; - import java.sql.Timestamp; import java.util.ArrayList; import java.util.Date; @@ -12,6 +13,8 @@ import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; + +import jakarta.ejb.EJB; import jakarta.ejb.Stateless; import jakarta.ejb.TransactionAttribute; import static jakarta.ejb.TransactionAttributeType.REQUIRES_NEW; @@ -38,6 +41,9 @@ public class DvObjectServiceBean implements java.io.Serializable { @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; + @EJB + PidProviderFactoryBean pidProviderFactoryBean; + private static final Logger logger = Logger.getLogger(DvObjectServiceBean.class.getCanonicalName()); /** * @param dvoc The object we check @@ -389,4 +395,19 @@ public String generateNewIdentifierByStoredProcedure() { return (String) query.getOutputParameterValue(1); } + /** @deprecated Backward-compatibility method to get the effective pid generator for a DvObjectContainer. + * If the dvObjectContainer method fails, this method will check for the old global default settings. + * If/when those are no longer supported, this method can be removed and replaced with calls directly + * to dvObjectContainer.getEffectivePidGenerator(); + * + */ + @Deprecated(forRemoval = true, since = "2024-02-09") + public PidProvider getEffectivePidGenerator(DvObjectContainer dvObjectContainer) { + PidProvider pidGenerator = dvObjectContainer.getEffectivePidGenerator(); + if (pidGenerator == null) { + pidGenerator = pidProviderFactoryBean.getDefaultPidGenerator(); + } + return pidGenerator; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 02a148f8cc5..993cb02b66b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -28,6 +28,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.RequestRsyncScriptCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetThumbnailCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; +import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDataFilesCommand; import edu.harvard.iq.dataverse.ingest.IngestRequest; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.ingest.IngestUtil; @@ -36,6 +37,7 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.Setting; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -186,7 +188,13 @@ public enum Referrer { // Used to store results of permissions checks private final Map datasetPermissionMap = new HashMap<>(); // { Permission human_name : Boolean } + // Size limit of an individual file: (set for the storage volume used) private Long maxFileUploadSizeInBytes = null; + // Total amount of data that the user should be allowed to upload. + // Will be calculated in real time based on various level quotas - + // for this user and/or this collection/dataset, etc. We should + // assume that it may change during the user session. + private Long maxTotalUploadSizeInBytes = null; private Long maxIngestSizeInBytes = null; // CSV: 4.8 MB, DTA: 976.6 KB, XLSX: 5.7 MB, etc. private String humanPerFormatTabularLimits = null; @@ -198,6 +206,7 @@ public enum Referrer { private final int NUMBER_OF_SCROLL_ROWS = 25; private DataFile singleFile = null; + private UploadSessionQuotaLimit uploadSessionQuota = null; public DataFile getSingleFile() { return singleFile; @@ -340,6 +349,18 @@ public boolean isUnlimitedUploadFileSize() { return this.maxFileUploadSizeInBytes == null; } + + public Long getMaxTotalUploadSizeInBytes() { + return maxTotalUploadSizeInBytes; + } + + public String getHumanMaxTotalUploadSizeInBytes() { + return FileSizeChecker.bytesToHumanReadable(maxTotalUploadSizeInBytes); + } + + public boolean isStorageQuotaEnforced() { + return uploadSessionQuota != null; + } public Long getMaxIngestSizeInBytes() { return maxIngestSizeInBytes; @@ -508,15 +529,28 @@ public String initCreateMode(String modeToken, DatasetVersion version, MutableBo selectedFiles = selectedFileMetadatasList; this.maxFileUploadSizeInBytes = systemConfig.getMaxFileUploadSizeForStore(dataset.getEffectiveStorageDriverId()); + if (systemConfig.isStorageQuotasEnforced()) { + this.uploadSessionQuota = datafileService.getUploadSessionQuotaLimit(dataset); + if (this.uploadSessionQuota != null) { + this.maxTotalUploadSizeInBytes = uploadSessionQuota.getRemainingQuotaInBytes(); + } + } else { + this.maxTotalUploadSizeInBytes = null; + } this.maxIngestSizeInBytes = systemConfig.getTabularIngestSizeLimit(); this.humanPerFormatTabularLimits = populateHumanPerFormatTabularLimits(); this.multipleUploadFilesLimit = systemConfig.getMultipleUploadFilesLimit(); - + logger.fine("done"); saveEnabled = true; + return null; } + + public boolean isQuotaExceeded() { + return systemConfig.isStorageQuotasEnforced() && uploadSessionQuota != null && uploadSessionQuota.getRemainingQuotaInBytes() == 0; + } public String init() { // default mode should be EDIT @@ -559,10 +593,15 @@ public String init() { clone = workingVersion.cloneDatasetVersion(); this.maxFileUploadSizeInBytes = systemConfig.getMaxFileUploadSizeForStore(dataset.getEffectiveStorageDriverId()); + if (systemConfig.isStorageQuotasEnforced()) { + this.uploadSessionQuota = datafileService.getUploadSessionQuotaLimit(dataset); + if (this.uploadSessionQuota != null) { + this.maxTotalUploadSizeInBytes = uploadSessionQuota.getRemainingQuotaInBytes(); + } + } this.maxIngestSizeInBytes = systemConfig.getTabularIngestSizeLimit(); this.humanPerFormatTabularLimits = populateHumanPerFormatTabularLimits(); this.multipleUploadFilesLimit = systemConfig.getMultipleUploadFilesLimit(); - this.maxFileUploadSizeInBytes = systemConfig.getMaxFileUploadSizeForStore(dataset.getEffectiveStorageDriverId()); hasValidTermsOfAccess = isHasValidTermsOfAccess(); if (!hasValidTermsOfAccess) { @@ -656,7 +695,7 @@ public String init() { if (isHasPublicStore()){ JH.addMessage(FacesMessage.SEVERITY_WARN, getBundleString("dataset.message.label.fileAccess"), getBundleString("dataset.message.publicInstall")); } - + return null; } @@ -1063,7 +1102,7 @@ public String save() { } // Try to save the NEW files permanently: - List filesAdded = ingestService.saveAndAddFilesToDataset(workingVersion, newFiles, null, true); + List filesAdded = ingestService.saveAndAddFilesToDataset(workingVersion, newFiles, null, true); // reset the working list of fileMetadatas, as to only include the ones // that have been added to the version successfully: @@ -1198,9 +1237,6 @@ public String save() { - We decided not to bother obtaining persistent ids for new files as they are uploaded and created. The identifiers will be assigned later, when the version is published. - - logger.info("starting async job for obtaining persistent ids for files."); - datasetService.obtainPersistentIdentifiersForDatafiles(dataset); */ } @@ -1493,14 +1529,16 @@ public void handleDropBoxUpload(ActionEvent event) { // for example, multiple files can be extracted from an uncompressed // zip file. //datafiles = ingestService.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream"); - CreateDataFileResult createDataFilesResult = FileUtil.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream", null, null, systemConfig); + //CreateDataFileResult createDataFilesResult = FileUtil.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream", null, null, systemConfig); + Command cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, dropBoxStream, fileName, "application/octet-stream", null, uploadSessionQuota, null); + CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd); datafiles = createDataFilesResult.getDataFiles(); Optional.ofNullable(editDataFilesPageHelper.getHtmlErrorMessage(createDataFilesResult)).ifPresent(errorMessage -> errorMessages.add(errorMessage)); - } catch (IOException ex) { + } catch (CommandException ex) { this.logger.log(Level.SEVERE, "Error during ingest of DropBox file {0} from link {1}", new Object[]{fileName, fileLink}); continue; - }/*catch (FileExceedsMaxSizeException ex){ + } /*catch (FileExceedsMaxSizeException ex){ this.logger.log(Level.SEVERE, "Error during ingest of DropBox file {0} from link {1}: {2}", new Object[]{fileName, fileLink, ex.getMessage()}); continue; }*/ finally { @@ -2023,7 +2061,21 @@ public void handleFileUpload(FileUploadEvent event) throws IOException { // Note: A single uploaded file may produce multiple datafiles - // for example, multiple files can be extracted from an uncompressed // zip file. - CreateDataFileResult createDataFilesResult = FileUtil.createDataFiles(workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, null, systemConfig); + ///CreateDataFileResult createDataFilesResult = FileUtil.createDataFiles(workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, null, systemConfig); + + Command cmd; + if (mode == FileEditMode.CREATE) { + // This is a file upload in the context of creating a brand new + // dataset that does not yet exist in the database. We must + // use the version of the Create New Files constructor that takes + // the parent Dataverse as the extra argument: + cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, uploadSessionQuota, null, null, null, workingVersion.getDataset().getOwner()); + } else { + cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, uploadSessionQuota, null); + } + CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd); + + dFileList = createDataFilesResult.getDataFiles(); String createDataFilesError = editDataFilesPageHelper.getHtmlErrorMessage(createDataFilesResult); if(createDataFilesError != null) { @@ -2032,8 +2084,14 @@ public void handleFileUpload(FileUploadEvent event) throws IOException { } } catch (IOException ioex) { + // shouldn't we try and communicate to the user what happened? logger.warning("Failed to process and/or save the file " + uFile.getFileName() + "; " + ioex.getMessage()); return; + } catch (CommandException cex) { + // shouldn't we try and communicate to the user what happened? + errorMessages.add(cex.getMessage()); + uploadComponentId = event.getComponent().getClientId(); + return; } /*catch (FileExceedsMaxSizeException ex) { logger.warning("Failed to process and/or save the file " + uFile.getFileName() + "; " + ex.getMessage()); @@ -2111,6 +2169,11 @@ public void handleExternalUpload() { - Max size NOT specified in db: default is unlimited - Max size specified in db: check too make sure file is within limits // ---------------------------- */ + /** + * @todo: this file size limit check is now redundant here, since the new + * CreateNewFilesCommand is going to perform it (and the quota + * checks too, if enabled + */ if ((!this.isUnlimitedUploadFileSize()) && (fileSize > this.getMaxFileUploadSizeInBytes())) { String warningMessage = "Uploaded file \"" + fileName + "\" exceeded the limit of " + fileSize + " bytes and was not uploaded."; sio.delete(); @@ -2130,18 +2193,27 @@ public void handleExternalUpload() { List datafiles = new ArrayList<>(); // ----------------------------------------------------------- - // Send it through the ingest service + // Execute the CreateNewDataFiles command: // ----------------------------------------------------------- + + Dataverse parent = null; + + if (mode == FileEditMode.CREATE) { + // This is a file upload in the context of creating a brand new + // dataset that does not yet exist in the database. We must + // pass the parent Dataverse to the CreateNewFiles command + // constructor. The RequiredPermission on the command in this + // scenario = Permission.AddDataset on the parent dataverse. + parent = workingVersion.getDataset().getOwner(); + } + try { - - // Note: A single uploaded file may produce multiple datafiles - - // for example, multiple files can be extracted from an uncompressed - // zip file. - //datafiles = ingestService.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream"); - CreateDataFileResult createDataFilesResult = FileUtil.createDataFiles(workingVersion, null, fileName, contentType, fullStorageIdentifier, checksumValue, checksumType, systemConfig); + + Command cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, null, fileName, contentType, fullStorageIdentifier, uploadSessionQuota, checksumValue, checksumType, fileSize, parent); + CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd); datafiles = createDataFilesResult.getDataFiles(); Optional.ofNullable(editDataFilesPageHelper.getHtmlErrorMessage(createDataFilesResult)).ifPresent(errorMessage -> errorMessages.add(errorMessage)); - } catch (IOException ex) { + } catch (CommandException ex) { logger.log(Level.SEVERE, "Error during ingest of file {0}", new Object[]{fileName}); } diff --git a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java index bad8903c091..3793b6eeeb4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java +++ b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java @@ -17,8 +17,7 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; -import edu.harvard.iq.dataverse.pidproviders.FakePidProviderServiceBean; -import edu.harvard.iq.dataverse.pidproviders.PermaLinkPidProviderServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactoryBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.search.IndexBatchServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; @@ -31,6 +30,7 @@ import edu.harvard.iq.dataverse.search.SolrIndexServiceBean; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.ConstraintViolationUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -48,7 +48,6 @@ import static jakarta.ejb.TransactionAttributeType.SUPPORTS; import jakarta.persistence.EntityManager; import jakarta.persistence.PersistenceContext; -import jakarta.validation.ConstraintViolation; import jakarta.validation.ConstraintViolationException; /** @@ -113,20 +112,8 @@ public class EjbDataverseEngine { DataverseFieldTypeInputLevelServiceBean fieldTypeInputLevels; @EJB - DOIEZIdServiceBean doiEZId; - - @EJB - DOIDataCiteServiceBean doiDataCite; - - @EJB - FakePidProviderServiceBean fakePidProvider; + PidProviderFactoryBean pidProviderFactory; - @EJB - HandlenetServiceBean handleNet; - - @EJB - PermaLinkPidProviderServiceBean permaLinkProvider; - @EJB SettingsServiceBean settings; @@ -184,6 +171,9 @@ public class EjbDataverseEngine { @EJB ConfirmEmailServiceBean confirmEmailService; + @EJB + StorageUseServiceBean storageUseService; + @EJB EjbDataverseEngineInner innerEngine; @@ -480,28 +470,8 @@ public DataverseFieldTypeInputLevelServiceBean fieldTypeInputLevels() { } @Override - public DOIEZIdServiceBean doiEZId() { - return doiEZId; - } - - @Override - public DOIDataCiteServiceBean doiDataCite() { - return doiDataCite; - } - - @Override - public FakePidProviderServiceBean fakePidProvider() { - return fakePidProvider; - } - - @Override - public HandlenetServiceBean handleNet() { - return handleNet; - } - - @Override - public PermaLinkPidProviderServiceBean permaLinkProvider() { - return permaLinkProvider; + public PidProviderFactoryBean pidProviderFactory() { + return pidProviderFactory; } @Override @@ -528,6 +498,12 @@ public DataverseLinkingServiceBean dvLinking() { public DatasetLinkingServiceBean dsLinking() { return dsLinking; } + + @Override + public StorageUseServiceBean storageUse() { + return storageUseService; + } + @Override public DataverseEngine engine() { return new DataverseEngine() { diff --git a/src/main/java/edu/harvard/iq/dataverse/FileAccessRequest.java b/src/main/java/edu/harvard/iq/dataverse/FileAccessRequest.java index 6f68815c2ca..51c67a37a09 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileAccessRequest.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileAccessRequest.java @@ -1,60 +1,178 @@ package edu.harvard.iq.dataverse; -import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import java.io.Serializable; +import java.util.Date; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import jakarta.persistence.Column; -import jakarta.persistence.Embeddable; -import jakarta.persistence.EmbeddedId; import jakarta.persistence.Entity; +import jakarta.persistence.EnumType; +import jakarta.persistence.Enumerated; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; import jakarta.persistence.JoinColumn; import jakarta.persistence.ManyToOne; -import jakarta.persistence.MapsId; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; +import jakarta.persistence.OneToOne; import jakarta.persistence.Table; import jakarta.persistence.Temporal; import jakarta.persistence.TemporalType; -import java.io.Serializable; -import java.util.Date; + +/** + * + * @author Marina + */ @Entity @Table(name = "fileaccessrequests") -public class FileAccessRequest { - @EmbeddedId - private FileAccessRequestKey id; + +@NamedQueries({ + @NamedQuery(name = "FileAccessRequest.findByAuthenticatedUserId", + query = "SELECT far FROM FileAccessRequest far WHERE far.user.id=:authenticatedUserId"), + @NamedQuery(name = "FileAccessRequest.findByGuestbookResponseId", + query = "SELECT far FROM FileAccessRequest far WHERE far.guestbookResponse.id=:guestbookResponseId"), + @NamedQuery(name = "FileAccessRequest.findByDataFileId", + query = "SELECT far FROM FileAccessRequest far WHERE far.dataFile.id=:dataFileId"), + @NamedQuery(name = "FileAccessRequest.findByRequestState", + query = "SELECT far FROM FileAccessRequest far WHERE far.requestState=:requestState"), + @NamedQuery(name = "FileAccessRequest.findByAuthenticatedUserIdAndRequestState", + query = "SELECT far FROM FileAccessRequest far WHERE far.user.id=:authenticatedUserId and far.requestState=:requestState"), + @NamedQuery(name = "FileAccessRequest.findByGuestbookResponseIdAndRequestState", + query = "SELECT far FROM FileAccessRequest far WHERE far.guestbookResponse.id=:guestbookResponseId and far.requestState=:requestState"), + @NamedQuery(name = "FileAccessRequest.findByDataFileIdAndRequestState", + query = "SELECT far FROM FileAccessRequest far WHERE far.dataFile.id=:dataFileId and far.requestState=:requestState"), + @NamedQuery(name = "FileAccessRequest.findByAuthenticatedUserIdAndDataFileIdAndRequestState", + query = "SELECT far FROM FileAccessRequest far WHERE far.user.id=:authenticatedUserId and far.dataFile.id=:dataFileId and far.requestState=:requestState") +}) + + +public class FileAccessRequest implements Serializable{ + private static final long serialVersionUID = 1L; + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + @ManyToOne - @MapsId("dataFile") - @JoinColumn(name = "datafile_id") + @JoinColumn(nullable=false) private DataFile dataFile; + @ManyToOne - @MapsId("authenticatedUser") - @JoinColumn(name = "authenticated_user_id") - private AuthenticatedUser authenticatedUser; - + @JoinColumn(name="authenticated_user_id",nullable=false) + private AuthenticatedUser user; + + @OneToOne + @JoinColumn(nullable=true) + private GuestbookResponse guestbookResponse; + + public enum RequestState {CREATED, GRANTED, REJECTED}; + //private RequestState state; + @Enumerated(EnumType.STRING) + @Column(name="request_state", nullable=false ) + private RequestState requestState; + @Temporal(value = TemporalType.TIMESTAMP) @Column(name = "creation_time") private Date creationTime; - - public FileAccessRequestKey getId() { + + public FileAccessRequest(){ + } + + public FileAccessRequest(DataFile df, AuthenticatedUser au){ + setDataFile(df); + setRequester(au); + setState(RequestState.CREATED); + setCreationTime(new Date()); + } + + public FileAccessRequest(DataFile df, AuthenticatedUser au, GuestbookResponse gbr){ + this(df, au); + setGuestbookResponse(gbr); + } + + public Long getId() { return id; } - public void setId(FileAccessRequestKey id) { + public void setId(Long id) { this.id = id; } - - public DataFile getDataFile() { + + public DataFile getDataFile(){ return dataFile; } + + public final void setDataFile(DataFile df){ + this.dataFile = df; + } + + public AuthenticatedUser getRequester(){ + return user; + } + + public final void setRequester(AuthenticatedUser au){ + this.user = au; + } + + public GuestbookResponse getGuestbookResponse(){ + return guestbookResponse; + } + + public final void setGuestbookResponse(GuestbookResponse gbr){ + this.guestbookResponse = gbr; + } + + public RequestState getState() { + return this.requestState; + } + + public void setState(RequestState requestState) { + this.requestState = requestState; + } + + public String getStateLabel() { + if(isStateCreated()){ + return "created"; + } + if(isStateGranted()) { + return "granted"; + } + if(isStateRejected()) { + return "rejected"; + } + return null; + } + + public void setStateCreated() { + this.requestState = RequestState.CREATED; + } + + public void setStateGranted() { + this.requestState = RequestState.GRANTED; + } - public void setDataFile(DataFile dataFile) { - this.dataFile = dataFile; + public void setStateRejected() { + this.requestState = RequestState.REJECTED; } - public AuthenticatedUser getAuthenticatedUser() { - return authenticatedUser; + public boolean isStateCreated() { + return this.requestState == RequestState.CREATED; + } + + public boolean isStateGranted() { + return this.requestState == RequestState.GRANTED; } - public void setAuthenticatedUser(AuthenticatedUser authenticatedUser) { - this.authenticatedUser = authenticatedUser; + public boolean isStateRejected() { + return this.requestState == RequestState.REJECTED; + } + + @Override + public int hashCode() { + int hash = 0; + hash += (id != null ? id.hashCode() : 0); + return hash; } public Date getCreationTime() { @@ -64,28 +182,19 @@ public Date getCreationTime() { public void setCreationTime(Date creationTime) { this.creationTime = creationTime; } - - @Embeddable - public static class FileAccessRequestKey implements Serializable { - @Column(name = "datafile_id") - private Long dataFile; - @Column(name = "authenticated_user_id") - private Long authenticatedUser; - - public Long getDataFile() { - return dataFile; + + @Override + public boolean equals(Object object) { + // TODO: Warning - this method won't work in the case the id fields are not set + if (!(object instanceof FileAccessRequest)) { + return false; } - - public void setDataFile(Long dataFile) { - this.dataFile = dataFile; - } - - public Long getAuthenticatedUser() { - return authenticatedUser; - } - - public void setAuthenticatedUser(Long authenticatedUser) { - this.authenticatedUser = authenticatedUser; + FileAccessRequest other = (FileAccessRequest) object; + if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { + return false; } + return true; } -} + + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/FileAccessRequestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileAccessRequestServiceBean.java new file mode 100644 index 00000000000..af8577fad34 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/FileAccessRequestServiceBean.java @@ -0,0 +1,87 @@ +package edu.harvard.iq.dataverse; + +import java.util.List; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; + +/** + * + * @author Marina + */ +@Stateless +@Named +public class FileAccessRequestServiceBean { + + @PersistenceContext(unitName = "VDCNet-ejbPU") + private EntityManager em; + + public FileAccessRequest find(Object pk) { + return em.find(FileAccessRequest.class, pk); + } + + public List findAll() { + return em.createQuery("select object(o) from FileAccessRequest as o order by o.id", FileAccessRequest.class).getResultList(); + } + + public List findAll(Long authenticatedUserId, Long fileId, FileAccessRequest.RequestState requestState){ + return em.createNamedQuery("FileAccessRequest.findByAuthenticatedUserIdAndDataFileIdAndRequestState", FileAccessRequest.class) + .setParameter("authenticatedUserId",authenticatedUserId) + .setParameter("dataFileId",fileId) + .setParameter("requestState",requestState) + .getResultList(); + } + + public List findAllByAuthenticedUserId(Long authenticatedUserId){ + return em.createNamedQuery("FileAccessRequest.findByAuthenticatedUserId", FileAccessRequest.class) + .setParameter("authenticatedUserId", authenticatedUserId) + .getResultList(); + } + + public List findAllByGuestbookResponseId(Long guestbookResponseId){ + return em.createNamedQuery("FileAccessRequest.findByGuestbookResponseId", FileAccessRequest.class) + .setParameter("guestbookResponseId", guestbookResponseId) + .getResultList(); + + } + + public List findAllByDataFileId(Long dataFileId){ + return em.createNamedQuery("FileAccessRequest.findByDataFileId", FileAccessRequest.class) + .setParameter("dataFileId", dataFileId) + .getResultList(); + } + + public List findAllByAuthenticatedUserIdAndRequestState(Long authenticatedUserId, FileAccessRequest.RequestState requestState){ + return em.createNamedQuery("FileAccessRequest.findByAuthenticatedUserIdAndRequestState", FileAccessRequest.class) + .setParameter("authenticatedUserId", authenticatedUserId) + .setParameter("requestState",requestState) + .getResultList(); + } + + public List findAllByGuestbookResponseIdAndRequestState(Long guestbookResponseId, FileAccessRequest.RequestState requestState){ + return em.createNamedQuery("FileAccessRequest.findByGuestbookResponseIdAndRequestState", FileAccessRequest.class) + .setParameter("dataFileId", guestbookResponseId) + .setParameter("requestState",requestState) + .getResultList(); + } + + public List findAllByDataFileIdAndRequestState(Long dataFileId, FileAccessRequest.RequestState requestState){ + return em.createNamedQuery("FileAccessRequest.findByDataFileIdAndRequestState", FileAccessRequest.class) + .setParameter("dataFileId", dataFileId) + .setParameter("requestState",requestState) + .getResultList(); + } + + + public FileAccessRequest save(FileAccessRequest far) { + if (far.getId() == null) { + em.persist(far); + return far; + } else { + return em.merge(far); + } + } + + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownload.java b/src/main/java/edu/harvard/iq/dataverse/FileDownload.java deleted file mode 100644 index a79281f71f0..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownload.java +++ /dev/null @@ -1,163 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package edu.harvard.iq.dataverse; - -import java.io.Serializable; -import jakarta.persistence.Entity; -import jakarta.persistence.GeneratedValue; -import jakarta.persistence.GenerationType; -import jakarta.persistence.Id; -import jakarta.persistence.Temporal; -import jakarta.persistence.TemporalType; -import jakarta.persistence.Transient; -import jakarta.persistence.CascadeType; -import jakarta.persistence.OneToOne; -import jakarta.persistence.MapsId; -import jakarta.persistence.FetchType; -import jakarta.persistence.JoinColumn; -import java.util.Date; - - -/** - * - * @author marina - */ -@Entity -public class FileDownload implements Serializable { - - @Id - private Long id; - - @OneToOne(fetch = FetchType.LAZY) - @MapsId - private GuestbookResponse guestbookResponse; - - @Temporal(value = TemporalType.TIMESTAMP) - private Date downloadTimestamp; - - /* - Transient Values carry non-written information - that will assist in the download process - - selected file ids is a comma delimited list that contains the file ids for multiple download - - fileFormat tells the download api which format a subsettable file should be downloaded as - */ - - @Transient - private String selectedFileIds; - - @Transient - private String fileFormat; - - - /** - * Possible values for downloadType include "Download", "Subset", - * or the displayName of an ExternalTool. - * - * TODO: Types like "Download" and "Subset" should - * be defined once as constants (likely an enum) rather than having these - * strings duplicated in various places when setDownloadtype() is called. - */ - private String downloadtype; - private String sessionId; - - public FileDownload(){ - - } - - public FileDownload(FileDownload source){ - this.setDownloadTimestamp(source.getDownloadTimestamp()); - this.setDownloadtype(source.getDownloadtype()); - this.setFileFormat(source.getFileFormat()); - this.setGuestbookResponse(source.getGuestbookResponse()); - this.setSelectedFileIds(source.getSelectedFileIds()); - this.setSessionId(source.getSessionId()); - } - - public String getFileFormat() { - return fileFormat; - } - - //for download - public void setFileFormat(String downloadFormat) { - this.fileFormat = downloadFormat; - } - - public String getDownloadtype() { - return downloadtype; - } - - public void setDownloadtype(String downloadtype) { - this.downloadtype = downloadtype; - } - - public String getSessionId() { - return sessionId; - } - - public void setSessionId(String sessionId) { - this.sessionId = sessionId; - } - - public String getSelectedFileIds() { - return selectedFileIds; - } - - public void setSelectedFileIds(String selectedFileIds) { - this.selectedFileIds = selectedFileIds; - } - - public Long getId() { - return id; - } - - public void setId(Long id) { - this.id = id; - } - - public Date getDownloadTimestamp(){ - return this.downloadTimestamp; - } - - public void setDownloadTimestamp(Date downloadTimestamp){ - this.downloadTimestamp = downloadTimestamp; - } - - - public void setGuestbookResponse(GuestbookResponse gbr){ - this.guestbookResponse = gbr; - } - - public GuestbookResponse getGuestbookResponse(){ - return this.guestbookResponse; - } - - @Override - public int hashCode() { - int hash = 0; - hash += (id != null ? id.hashCode() : 0); - return hash; - } - - @Override - public boolean equals(Object object) { - // TODO: Warning - this method won't work in the case the id fields are not set - if (!(object instanceof FileDownload)) { - return false; - } - FileDownload other = (FileDownload) object; - if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { - return false; - } - return true; - } - - @Override - public String toString() { - return "edu.harvard.iq.dataverse.FileDownload[ id=" + id + " ]"; - } - - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java index c4b4978e0f8..4d8100124ec 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java @@ -9,6 +9,7 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.externaltools.ExternalTool; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; @@ -53,6 +54,9 @@ public class FileDownloadHelper implements java.io.Serializable { @EJB DataFileServiceBean datafileService; + + @EJB + GlobusServiceBean globusService; private final Map fileDownloadPermissionMap = new HashMap<>(); // { FileMetadata.id : Boolean } @@ -60,40 +64,40 @@ public FileDownloadHelper() { this.filesForRequestAccess = new ArrayList<>(); } - // See also @Size(max = 255) in GuestbookResponse - private boolean testResponseLength(String value) { - return !(value != null && value.length() > 255); - } - // This helper method is called from the Download terms/guestbook/etc. popup, // when the user clicks the "ok" button. We use it, instead of calling // downloadServiceBean directly, in order to differentiate between single - // file downloads and multiple (batch) downloads - sice both use the same + // file downloads and multiple (batch) downloads - since both use the same // terms/etc. popup. - public void writeGuestbookAndStartDownload(GuestbookResponse guestbookResponse) { - PrimeFaces.current().executeScript("PF('downloadPopup').hide()"); - guestbookResponse.setDownloadtype("Download"); + public void writeGuestbookAndStartDownload(GuestbookResponse guestbookResponse, boolean isGlobusTransfer) { + PrimeFaces.current().executeScript("PF('guestbookAndTermsPopup').hide()"); + guestbookResponse.setEventType(GuestbookResponse.DOWNLOAD); // Note that this method is only ever called from the file-download-popup - // meaning we know for the fact that we DO want to save this // guestbookResponse permanently in the database. - if (guestbookResponse.getSelectedFileIds() != null) { - // this is a batch (multiple file) download. - // Although here's a chance that this is not really a batch download - i.e., - // there may only be one file on the file list. But the fileDownloadService - // method below will check for that, and will redirect to the single download, if - // that's the case. -- L.A. - fileDownloadService.writeGuestbookAndStartBatchDownload(guestbookResponse); - } else if (guestbookResponse.getDataFile() != null) { - // this a single file download: - fileDownloadService.writeGuestbookAndStartFileDownload(guestbookResponse); + if(isGlobusTransfer) { + globusService.writeGuestbookAndStartTransfer(guestbookResponse, true); + } else { + if (guestbookResponse.getSelectedFileIds() != null) { + // this is a batch (multiple file) download. + // Although here's a chance that this is not really a batch download - i.e., + // there may only be one file on the file list. But the fileDownloadService + // method below will check for that, and will redirect to the single download, + // if + // that's the case. -- L.A. + fileDownloadService.writeGuestbookAndStartBatchDownload(guestbookResponse); + } else if (guestbookResponse.getDataFile() != null) { + // this a single file download: + fileDownloadService.writeGuestbookAndStartFileDownload(guestbookResponse); + } } } public void writeGuestbookAndOpenSubset(GuestbookResponse guestbookResponse) { - PrimeFaces.current().executeScript("PF('downloadPopup').hide()"); + PrimeFaces.current().executeScript("PF('guestbookAndTermsPopup').hide()"); PrimeFaces.current().executeScript("PF('downloadDataSubsetPopup').show()"); - guestbookResponse.setDownloadtype("Subset"); + guestbookResponse.setEventType(GuestbookResponse.SUBSET); fileDownloadService.writeGuestbookResponseRecord(guestbookResponse); } @@ -132,22 +136,33 @@ public void writeGuestbookAndLaunchExploreTool(GuestbookResponse guestbookRespon fileDownloadService.explore(guestbookResponse, fmd, externalTool); //requestContext.execute("PF('downloadPopup').hide()"); - PrimeFaces.current().executeScript("PF('downloadPopup').hide()"); + PrimeFaces.current().executeScript("PF('guestbookAndTermsPopup').hide()"); } public void writeGuestbookAndLaunchPackagePopup(GuestbookResponse guestbookResponse) { - PrimeFaces.current().executeScript("PF('downloadPopup').hide()"); + PrimeFaces.current().executeScript("PF('guestbookAndTermsPopup').hide()"); PrimeFaces.current().executeScript("PF('downloadPackagePopup').show()"); PrimeFaces.current().executeScript("handleResizeDialog('downloadPackagePopup')"); fileDownloadService.writeGuestbookResponseRecord(guestbookResponse); } + + public void writeGuestbookResponseAndRequestAccess(GuestbookResponse guestbookResponse) { + + if(!filesForRequestAccess.isEmpty()) { + /* Only for single file requests (i.e. from kebab menu) */ + guestbookResponse.setDataFile(filesForRequestAccess.get(0)); + } + PrimeFaces.current().executeScript("PF('guestbookAndTermsPopup').hide()"); + fileDownloadService.writeGuestbookResponseAndRequestAccess(guestbookResponse); + } + /** * Writes a guestbook entry for either popup scenario: guestbook or terms. */ public boolean writeGuestbookAndShowPreview(GuestbookResponse guestbookResponse) { - guestbookResponse.setDownloadtype("Explore"); + guestbookResponse.setEventType(GuestbookResponse.EXPLORE); fileDownloadService.writeGuestbookResponseRecord(guestbookResponse); return true; } @@ -284,7 +299,7 @@ public void handleCommandLinkClick(FileMetadata fmd){ if (FileUtil.isRequestAccessPopupRequired(fmd.getDatasetVersion())){ addFileForRequestAccess(fmd.getDataFile()); - PrimeFaces.current().executeScript("PF('requestAccessPopup').show()"); + PrimeFaces.current().executeScript("PF('guestbookAndTermsPopup').show();handleResizeDialog('guestbookAndTermsPopup');"); } else { requestAccess(fmd.getDataFile()); } @@ -299,7 +314,7 @@ public void requestAccessMultiple(List files) { DataFile notificationFile = null; for (DataFile file : files) { //Not sending notification via request method so that - // we can bundle them up into one nofication at dataset level + // we can bundle them up into one notification at dataset level test = processRequestAccess(file, false); succeeded |= test; if (notificationFile == null) { @@ -307,13 +322,15 @@ public void requestAccessMultiple(List files) { } } if (notificationFile != null && succeeded) { - fileDownloadService.sendRequestFileAccessNotification(notificationFile, (AuthenticatedUser) session.getUser()); + fileDownloadService.sendRequestFileAccessNotification(notificationFile.getOwner(), + notificationFile.getId(), (AuthenticatedUser) session.getUser()); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("file.accessRequested.success")); } } public void requestAccessIndirect() { //Called when there are multiple files and no popup - // or there's a popup with sigular or multiple files + // or there's a popup with singular or multiple files // The list of files for Request Access is set in the Dataset Page when // user clicks the request access button in the files fragment // (and has selected one or more files) @@ -325,13 +342,15 @@ private boolean processRequestAccess(DataFile file, Boolean sendNotification) { if (fileDownloadService.requestAccess(file.getId())) { // update the local file object so that the page properly updates AuthenticatedUser user = (AuthenticatedUser) session.getUser(); - file.addFileAccessRequester(user); + //This seems to be required because we don't get the updated file object back from the command called in the fileDownloadService.requestAccess call above + FileAccessRequest request = new FileAccessRequest(file, user); + file.addFileAccessRequest(request); // create notification if necessary if (sendNotification) { - fileDownloadService.sendRequestFileAccessNotification(file, user); - } - JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("file.accessRequested.success")); + fileDownloadService.sendRequestFileAccessNotification(file.getOwner(), file.getId(), (AuthenticatedUser) session.getUser()); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("file.accessRequested.success")); + } return true; } JsfHelper.addWarningMessage(BundleUtil.getStringFromBundle("file.accessRequested.alreadyRequested", Arrays.asList(file.getDisplayName()))); diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java index 13bfccb60ce..8688e4159fe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java @@ -4,10 +4,10 @@ import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.CreateGuestbookResponseCommand; import edu.harvard.iq.dataverse.engine.command.impl.RequestAccessCommand; @@ -15,11 +15,13 @@ import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry; -import edu.harvard.iq.dataverse.privateurl.PrivateUrl; -import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.JsfHelper; import edu.harvard.iq.dataverse.util.StringUtil; +import edu.harvard.iq.dataverse.util.URLTokenUtil; + import java.io.IOException; import java.sql.Timestamp; import java.util.ArrayList; @@ -72,9 +74,9 @@ public class FileDownloadServiceBean implements java.io.Serializable { @EJB AuthenticationServiceBean authService; @EJB - PrivateUrlServiceBean privateUrlService; - @EJB SettingsServiceBean settingsService; + @EJB + MailServiceBean mailService; @Inject DataverseSession session; @@ -191,6 +193,42 @@ public void writeGuestbookAndStartFileDownload(GuestbookResponse guestbookRespon redirectToDownloadAPI(guestbookResponse.getFileFormat(), guestbookResponse.getDataFile().getId()); logger.fine("issued file download redirect for datafile "+guestbookResponse.getDataFile().getId()); } + + public void writeGuestbookResponseAndRequestAccess(GuestbookResponse guestbookResponse){ + if (guestbookResponse == null || ( guestbookResponse.getDataFile() == null && guestbookResponse.getSelectedFileIds() == null) ) { + return; + } + + guestbookResponse.setEventType(GuestbookResponse.ACCESS_REQUEST); + + List selectedDataFiles = new ArrayList<>(); //always make sure it's at least an empty List + + if(guestbookResponse.getDataFile() != null ){ //one file 'selected' by 'Request Access' button click + selectedDataFiles.add(datafileService.find(guestbookResponse.getDataFile().getId())); //don't want the findCheapAndEasy + } + + if(guestbookResponse.getSelectedFileIds() != null && !guestbookResponse.getSelectedFileIds().isEmpty()) { //multiple selected through multi-select REquest Access button + selectedDataFiles = datafileService.findAll(guestbookResponse.getSelectedFileIds()); + } + + int countRequestAccessSuccess = 0; + + for(DataFile dataFile : selectedDataFiles){ + guestbookResponse.setDataFile(dataFile); + writeGuestbookResponseRecordForRequestAccess(guestbookResponse); + if(requestAccess(dataFile,guestbookResponse)){ + countRequestAccessSuccess++; + } else { + JsfHelper.addWarningMessage(BundleUtil.getStringFromBundle("file.accessRequested.alreadyRequested", Arrays.asList(dataFile.getDisplayName()))); + } + } + + if(countRequestAccessSuccess > 0){ + DataFile firstDataFile = selectedDataFiles.get(0); + sendRequestFileAccessNotification(firstDataFile.getOwner(), firstDataFile.getId(), (AuthenticatedUser) session.getUser()); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("file.accessRequested.success")); + } + } public void writeGuestbookResponseRecord(GuestbookResponse guestbookResponse, FileMetadata fileMetadata, String format) { if(!fileMetadata.getDatasetVersion().isDraft()){ @@ -220,6 +258,18 @@ public void writeGuestbookResponseRecord(GuestbookResponse guestbookResponse) { } } + public void writeGuestbookResponseRecordForRequestAccess(GuestbookResponse guestbookResponse) { + try { + CreateGuestbookResponseCommand cmd = new CreateGuestbookResponseCommand(dvRequestService.getDataverseRequest(), guestbookResponse, guestbookResponse.getDataset()); + commandEngine.submit(cmd); + + } catch (CommandException e) { + //if an error occurs here then download won't happen no need for response recs... + logger.info("Failed to writeGuestbookResponseRecord for RequestAccess"); + } + + } + // The "guestBookRecord(s)AlreadyWritten" parameter in the 2 methods // below (redirectToBatchDownloadAPI() and redirectToDownloadAPI(), for the // multiple- and single-file downloads respectively) are passed to the @@ -262,13 +312,19 @@ private void redirectToCustomZipDownloadService(String customZipServiceUrl, Stri } } - private void redirectToDownloadAPI(String downloadType, Long fileId, boolean guestBookRecordAlreadyWritten, Long fileMetadataId) { - String fileDownloadUrl = FileUtil.getFileDownloadUrlPath(downloadType, fileId, guestBookRecordAlreadyWritten, fileMetadataId); - logger.fine("Redirecting to file download url: " + fileDownloadUrl); - try { - FacesContext.getCurrentInstance().getExternalContext().redirect(fileDownloadUrl); - } catch (IOException ex) { - logger.info("Failed to issue a redirect to file download url (" + fileDownloadUrl + "): " + ex); + private void redirectToDownloadAPI(String downloadType, Long fileId, boolean guestBookRecordAlreadyWritten, + Long fileMetadataId) { + String fileDownloadUrl = FileUtil.getFileDownloadUrlPath(downloadType, fileId, guestBookRecordAlreadyWritten, + fileMetadataId); + if ("GlobusTransfer".equals(downloadType)) { + PrimeFaces.current().executeScript(URLTokenUtil.getScriptForUrl(fileDownloadUrl)); + } else { + logger.fine("Redirecting to file download url: " + fileDownloadUrl); + try { + FacesContext.getCurrentInstance().getExternalContext().redirect(fileDownloadUrl); + } catch (IOException ex) { + logger.info("Failed to issue a redirect to file download url (" + fileDownloadUrl + "): " + ex); + } } } @@ -299,7 +355,7 @@ public void explore(GuestbookResponse guestbookResponse, FileMetadata fmd, Exter User user = session.getUser(); DatasetVersion version = fmd.getDatasetVersion(); if (version.isDraft() || fmd.getDatasetVersion().isDeaccessioned() || (fmd.getDataFile().isRestricted()) || (FileUtil.isActivelyEmbargoed(fmd))) { - apiToken = getApiToken(user); + apiToken = authService.getValidApiTokenForUser(user); } DataFile dataFile = null; if (fmd != null) { @@ -312,7 +368,7 @@ public void explore(GuestbookResponse guestbookResponse, FileMetadata fmd, Exter String localeCode = session.getLocaleCode(); ExternalToolHandler externalToolHandler = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, localeCode); // Persist the name of the tool (i.e. "Data Explorer", etc.) - guestbookResponse.setDownloadtype(externalTool.getDisplayName()); + guestbookResponse.setEventType(externalTool.getDisplayName()); PrimeFaces.current().executeScript(externalToolHandler.getExploreScript()); // This is the old logic from TwoRavens, null checks and all. if (guestbookResponse != null && guestbookResponse.isWriteResponse() @@ -326,24 +382,6 @@ public void explore(GuestbookResponse guestbookResponse, FileMetadata fmd, Exter } } - public ApiToken getApiToken(User user) { - ApiToken apiToken = null; - if (user instanceof AuthenticatedUser) { - AuthenticatedUser authenticatedUser = (AuthenticatedUser) user; - apiToken = authService.findApiTokenByUser(authenticatedUser); - if (apiToken == null || apiToken.isExpired()) { - //No un-expired token - apiToken = authService.generateApiTokenForUser(authenticatedUser); - } - } else if (user instanceof PrivateUrlUser) { - PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; - PrivateUrl privateUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); - apiToken = new ApiToken(); - apiToken.setTokenString(privateUrl.getToken()); - } - return apiToken; - } - public void downloadDatasetCitationXML(Dataset dataset) { downloadCitationXML(null, dataset, false); } @@ -530,7 +568,7 @@ public boolean requestAccess(Long fileId) { return false; } DataFile file = datafileService.find(fileId); - if (!file.containsFileAccessRequestFromUser(session.getUser())) { + if (!file.containsActiveFileAccessRequestFromUser(session.getUser())) { try { commandEngine.submit(new RequestAccessCommand(dvRequestService.getDataverseRequest(), file)); return true; @@ -540,12 +578,33 @@ public boolean requestAccess(Long fileId) { } } return false; - } + } - public void sendRequestFileAccessNotification(DataFile datafile, AuthenticatedUser requestor) { - permissionService.getUsersWithPermissionOn(Permission.ManageFilePermissions, datafile).stream().forEach((au) -> { - userNotificationService.sendNotification(au, new Timestamp(new Date().getTime()), UserNotification.Type.REQUESTFILEACCESS, datafile.getId(), null, requestor, false); + public boolean requestAccess(DataFile dataFile, GuestbookResponse gbr){ + boolean accessRequested = false; + if (dvRequestService.getDataverseRequest().getAuthenticatedUser() == null){ + return accessRequested; + } + + if(!dataFile.containsActiveFileAccessRequestFromUser(session.getUser())) { + try { + commandEngine.submit(new RequestAccessCommand(dvRequestService.getDataverseRequest(), dataFile, gbr)); + accessRequested = true; + } catch (CommandException ex) { + logger.info("Unable to request access for file id " + dataFile.getId() + ". Exception: " + ex); + } + } + + return accessRequested; + } + + public void sendRequestFileAccessNotification(Dataset dataset, Long fileId, AuthenticatedUser requestor) { + Timestamp ts = new Timestamp(new Date().getTime()); + permissionService.getUsersWithPermissionOn(Permission.ManageDatasetPermissions, dataset).stream().forEach((au) -> { + userNotificationService.sendNotification(au, ts, UserNotification.Type.REQUESTFILEACCESS, fileId, null, requestor, true); }); + //send the user that requested access a notification that they requested the access + userNotificationService.sendNotification(requestor, ts, UserNotification.Type.REQUESTEDFILEACCESS, fileId, null, requestor, true); } @@ -613,5 +672,4 @@ public String getDirectStorageLocatrion(String storageLocation) { return null; } - } diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index 49c904c3ac3..dcb27b7c31b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -21,6 +21,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDatasetCommand; import edu.harvard.iq.dataverse.engine.command.impl.PersistProvFreeFormCommand; import edu.harvard.iq.dataverse.engine.command.impl.RestrictFileCommand; +import edu.harvard.iq.dataverse.engine.command.impl.UningestFileCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.export.ExportService; import io.gdcc.spi.export.ExportException; @@ -28,6 +29,8 @@ import edu.harvard.iq.dataverse.externaltools.ExternalTool; import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean; +import edu.harvard.iq.dataverse.ingest.IngestRequest; +import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; @@ -35,6 +38,8 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; +import edu.harvard.iq.dataverse.util.StringUtil; + import static edu.harvard.iq.dataverse.util.JsfHelper.JH; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -45,6 +50,7 @@ import java.util.Comparator; import java.util.List; import java.util.Set; +import java.util.logging.Level; import java.util.logging.Logger; import jakarta.ejb.EJB; import jakarta.ejb.EJBException; @@ -112,10 +118,10 @@ public class FilePage implements java.io.Serializable { GuestbookResponseServiceBean guestbookResponseService; @EJB AuthenticationServiceBean authService; - @EJB DatasetServiceBean datasetService; - + @EJB + IngestServiceBean ingestService; @EJB SystemConfig systemConfig; @@ -209,7 +215,7 @@ public String init() { // If this DatasetVersion is unpublished and permission is doesn't have permissions: // > Go to the Login page // - // Check permisisons + // Check permissions Boolean authorized = (fileMetadata.getDatasetVersion().isReleased()) || (!fileMetadata.getDatasetVersion().isReleased() && this.canViewUnpublishedDataset()); @@ -325,6 +331,20 @@ private List sortExternalTools(){ Collections.sort(retList, CompareExternalToolName); return retList; } + + private String termsGuestbookPopupAction = ""; + + public void setTermsGuestbookPopupAction(String popupAction){ + if(popupAction != null && popupAction.length() > 0){ + logger.info("TGPA set to " + popupAction); + this.termsGuestbookPopupAction = popupAction; + } + + } + + public String getTermsGuestbookPopupAction(){ + return termsGuestbookPopupAction; + } public boolean isDownloadPopupRequired() { if(fileMetadata.getId() == null || fileMetadata.getDatasetVersion().getId() == null ){ @@ -340,6 +360,18 @@ public boolean isRequestAccessPopupRequired() { return FileUtil.isRequestAccessPopupRequired(fileMetadata.getDatasetVersion()); } + public boolean isGuestbookAndTermsPopupRequired() { + if(fileMetadata.getId() == null || fileMetadata.getDatasetVersion().getId() == null ){ + return false; + } + return FileUtil.isGuestbookAndTermsPopupRequired(fileMetadata.getDatasetVersion()); + } + + public boolean isGuestbookPopupRequiredAtDownload(){ + // Only show guestbookAtDownload if guestbook at request is disabled (legacy behavior) + DatasetVersion workingVersion = fileMetadata.getDatasetVersion(); + return FileUtil.isGuestbookPopupRequired(workingVersion) && !workingVersion.getDataset().getEffectiveGuestbookEntryAtRequest(); + } public void setFileMetadata(FileMetadata fileMetadata) { this.fileMetadata = fileMetadata; @@ -449,6 +481,120 @@ public String restrictFile(boolean restricted) throws CommandException{ return returnToDraftVersion(); } + public String ingestFile() throws CommandException{ + + User u = session.getUser(); + if(!u.isAuthenticated() || !u.isSuperuser()) { + //Shouldn't happen (choice not displayed for users who don't have the right permission), but check anyway + logger.warning("User: " + u.getIdentifier() + " tried to ingest a file"); + JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("file.ingest.cantIngestFileWarning")); + return null; + } + + DataFile dataFile = fileMetadata.getDataFile(); + editDataset = dataFile.getOwner(); + + if (dataFile.isTabularData()) { + JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("file.ingest.alreadyIngestedWarning")); + return null; + } + + boolean ingestLock = dataset.isLockedFor(DatasetLock.Reason.Ingest); + + if (ingestLock) { + JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("file.ingest.ingestInProgressWarning")); + return null; + } + + if (!FileUtil.canIngestAsTabular(dataFile)) { + JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("file.ingest.cantIngestFileWarning")); + return null; + + } + + dataFile.SetIngestScheduled(); + + if (dataFile.getIngestRequest() == null) { + dataFile.setIngestRequest(new IngestRequest(dataFile)); + } + + dataFile.getIngestRequest().setForceTypeCheck(true); + + // update the datafile, to save the newIngest request in the database: + datafileService.save(file); + + // queue the data ingest job for asynchronous execution: + String status = ingestService.startIngestJobs(editDataset.getId(), new ArrayList<>(Arrays.asList(dataFile)), (AuthenticatedUser) session.getUser()); + + if (!StringUtil.isEmpty(status)) { + // This most likely indicates some sort of a problem (for example, + // the ingest job was not put on the JMS queue because of the size + // of the file). But we are still returning the OK status - because + // from the point of view of the API, it's a success - we have + // successfully gone through the process of trying to schedule the + // ingest job... + + logger.warning("Ingest Status for file: " + dataFile.getId() + " : " + status); + } + logger.fine("File: " + dataFile.getId() + " ingest queued"); + + init(); + JsfHelper.addInfoMessage(BundleUtil.getStringFromBundle("file.ingest.ingestQueued")); + return returnToDraftVersion(); + } + + public String uningestFile() throws CommandException { + + if (!file.isTabularData()) { + //Ingest never succeeded, either there was a failure or this is not a tabular data file + if (file.isIngestProblem()) { + //We allow anyone who can publish to uningest in order to clear a problem + User u = session.getUser(); + if (!u.isAuthenticated() || !(permissionService.permissionsFor(u, file).contains(Permission.PublishDataset))) { + logger.warning("User: " + u.getIdentifier() + " tried to uningest a file"); + // Shouldn't happen (choice not displayed for users who don't have the right + // permission), but check anyway + JH.addMessage(FacesMessage.SEVERITY_WARN, + BundleUtil.getStringFromBundle("file.ingest.cantUningestFileWarning")); + return null; + } + file.setIngestDone(); + file.setIngestReport(null); + } else { + //Shouldn't happen - got called when there is no tabular data or an ingest problem + JH.addMessage(FacesMessage.SEVERITY_WARN, + BundleUtil.getStringFromBundle("file.ingest.cantUningestFileWarning")); + return null; + } + } else { + //Superuser required to uningest after a success + //Uningest command does it's own check for isSuperuser + commandEngine.submit(new UningestFileCommand(dvRequestService.getDataverseRequest(), file)); + Long dataFileId = file.getId(); + file = datafileService.find(dataFileId); + } + editDataset = file.getOwner(); + if (editDataset.isReleased()) { + try { + ExportService instance = ExportService.getInstance(); + instance.exportAllFormats(editDataset); + + } catch (ExportException ex) { + // Something went wrong! + // Just like with indexing, a failure to export is not a fatal + // condition. We'll just log the error as a warning and keep + // going: + logger.log(Level.WARNING, "Uningest: Exception while exporting:{0}", ex.getMessage()); + } + } + datafileService.save(file); + + // Refresh filemetadata with file title, etc. + init(); + JH.addMessage(FacesMessage.SEVERITY_INFO, BundleUtil.getStringFromBundle("file.uningest.complete")); + return returnToDraftVersion(); + } + private List filesToBeDeleted = new ArrayList<>(); public String deleteFile() { @@ -922,6 +1068,12 @@ public boolean isPubliclyDownloadable() { return FileUtil.isPubliclyDownloadable(fileMetadata); } + public boolean isIngestable() { + DataFile f = fileMetadata.getDataFile(); + //Datafile is an ingestable type and hasn't been ingested yet or had an ingest fail + return (FileUtil.canIngestAsTabular(f)&&!(f.isTabularData() || f.isIngestProblem())); + } + private Boolean lockedFromEditsVar; private Boolean lockedFromDownloadVar; @@ -1043,7 +1195,7 @@ public String preview(ExternalTool externalTool) { ApiToken apiToken = null; User user = session.getUser(); if (fileMetadata.getDatasetVersion().isDraft() || fileMetadata.getDatasetVersion().isDeaccessioned() || (fileMetadata.getDataFile().isRestricted()) || (FileUtil.isActivelyEmbargoed(fileMetadata))) { - apiToken=fileDownloadService.getApiToken(user); + apiToken=authService.getValidApiTokenForUser(user); } if(externalTool == null){ return ""; @@ -1245,6 +1397,15 @@ public String getIngestMessage() { public boolean isHasPublicStore() { return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(DataAccess.getStorageDriverFromIdentifier(file.getStorageIdentifier()))); } + + //Allows use of fileDownloadHelper in file.xhtml + public FileDownloadHelper getFileDownloadHelper() { + return fileDownloadHelper; + } + + public void setFileDownloadHelper(FileDownloadHelper fileDownloadHelper) { + this.fileDownloadHelper = fileDownloadHelper; + } /** * This method only exists because in file-edit-button-fragment.xhtml we diff --git a/src/main/java/edu/harvard/iq/dataverse/FileSearchCriteria.java b/src/main/java/edu/harvard/iq/dataverse/FileSearchCriteria.java new file mode 100644 index 00000000000..62f10c18bdf --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/FileSearchCriteria.java @@ -0,0 +1,45 @@ +package edu.harvard.iq.dataverse; + +public class FileSearchCriteria { + + private final String contentType; + private final FileAccessStatus accessStatus; + private final String categoryName; + private final String tabularTagName; + private final String searchText; + + /** + * Status of the particular DataFile based on active embargoes and restriction state + */ + public enum FileAccessStatus { + Public, Restricted, EmbargoedThenRestricted, EmbargoedThenPublic + } + + public FileSearchCriteria(String contentType, FileAccessStatus accessStatus, String categoryName, String tabularTagName, String searchText) { + this.contentType = contentType; + this.accessStatus = accessStatus; + this.categoryName = categoryName; + this.tabularTagName = tabularTagName; + this.searchText = searchText; + } + + public String getContentType() { + return contentType; + } + + public FileAccessStatus getAccessStatus() { + return accessStatus; + } + + public String getCategoryName() { + return categoryName; + } + + public String getTabularTagName() { + return tabularTagName; + } + + public String getSearchText() { + return searchText; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java index 890b146a61c..a542cb52ac0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java @@ -6,7 +6,7 @@ package edu.harvard.iq.dataverse; -import edu.harvard.iq.dataverse.pidproviders.PermaLinkPidProviderServiceBean; +import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; import edu.harvard.iq.dataverse.util.BundleUtil; import static edu.harvard.iq.dataverse.util.StringUtil.isEmpty; import java.net.MalformedURLException; @@ -33,7 +33,7 @@ public GlobalId(String protocol, String authority, String identifier, String sep this.separator = separator; } this.urlPrefix = urlPrefix; - this.managingProviderName = providerName; + this.managingProviderId = providerName; } // protocol the identifier system, e.g. "doi" @@ -42,7 +42,7 @@ public GlobalId(String protocol, String authority, String identifier, String sep private String protocol; private String authority; private String identifier; - private String managingProviderName; + private String managingProviderId; private String separator = "/"; private String urlPrefix; @@ -67,8 +67,8 @@ public String getIdentifier() { return identifier; } - public String getProvider() { - return managingProviderName; + public String getProviderId() { + return managingProviderId; } public String toString() { diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java index 9fb584a9133..f54b1fb6117 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java @@ -288,19 +288,21 @@ public String save() { Command cmd; try { + // Per recent #dv-tech conversation w/ Jim - copying the code + // below from his QDR branch; the code that used to be here called + // UpdateDataverseCommand when saving new guestbooks, and that involved + // an unnecessary reindexing of the dataverse (and, in some cases, + // reindexing of the underlying datasets). - L.A. if (editMode == EditMode.CREATE || editMode == EditMode.CLONE ) { guestbook.setCreateTime(new Timestamp(new Date().getTime())); - guestbook.setUsageCount(new Long(0)); + guestbook.setUsageCount(Long.valueOf(0)); guestbook.setEnabled(true); dataverse.getGuestbooks().add(guestbook); - cmd = new UpdateDataverseCommand(dataverse, null, null, dvRequestService.getDataverseRequest(), null); - commandEngine.submit(cmd); create = true; - } else { - cmd = new UpdateDataverseGuestbookCommand(dataverse, guestbook, dvRequestService.getDataverseRequest()); - commandEngine.submit(cmd); - } - + } + cmd = new UpdateDataverseGuestbookCommand(dataverse, guestbook, dvRequestService.getDataverseRequest()); + commandEngine.submit(cmd); + } catch (EJBException ex) { StringBuilder error = new StringBuilder(); error.append(ex).append(" "); diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java index 0057fbeddab..9041ccf887c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java @@ -8,6 +8,8 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.externaltools.ExternalTool; +import edu.harvard.iq.dataverse.util.BundleUtil; + import java.io.Serializable; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -65,8 +67,9 @@ public class GuestbookResponse implements Serializable { @JoinColumn(nullable=true) private AuthenticatedUser authenticatedUser; - @OneToOne(cascade=CascadeType.ALL,mappedBy="guestbookResponse",fetch = FetchType.LAZY, optional = false) - private FileDownload fileDownload; + @OneToMany(mappedBy="guestbookResponse",cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST},fetch = FetchType.LAZY) + //private FileAccessRequest fileAccessRequest; + private List fileAccessRequests; @OneToMany(mappedBy="guestbookResponse",cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST},orphanRemoval=true) @OrderBy ("id") @@ -87,16 +90,37 @@ public class GuestbookResponse implements Serializable { @Temporal(value = TemporalType.TIMESTAMP) private Date responseTime; + + private String sessionId; + private String eventType; + + /** Event Types - there are four pre-defined values in use. + * The type can also be the name of a previewer/explore tool + */ + public static final String ACCESS_REQUEST = "AccessRequest"; + public static final String DOWNLOAD = "Download"; + static final String SUBSET = "Subset"; + static final String EXPLORE = "Explore"; + /* Transient Values carry non-written information that will assist in the download process - writeResponse is set to false when dataset version is draft. + - selected file ids is a comma delimited list that contains the file ids for multiple download + - fileFormat tells the download api which format a subsettable file should be downloaded as + */ @Transient private boolean writeResponse = true; + @Transient + private String selectedFileIds; + + @Transient + private String fileFormat; + /** * This transient variable is a place to temporarily retrieve the * ExternalTool object from the popup when the popup is required on the @@ -105,6 +129,7 @@ public class GuestbookResponse implements Serializable { @Transient private ExternalTool externalTool; + public boolean isWriteResponse() { return writeResponse; } @@ -114,19 +139,19 @@ public void setWriteResponse(boolean writeResponse) { } public String getSelectedFileIds(){ - return this.fileDownload.getSelectedFileIds(); + return this.selectedFileIds; } public void setSelectedFileIds(String selectedFileIds) { - this.fileDownload.setSelectedFileIds(selectedFileIds); + this.selectedFileIds = selectedFileIds; } public String getFileFormat() { - return this.fileDownload.getFileFormat(); + return this.fileFormat; } public void setFileFormat(String downloadFormat) { - this.fileDownload.setFileFormat(downloadFormat); + this.fileFormat = downloadFormat; } public ExternalTool getExternalTool() { @@ -138,10 +163,6 @@ public void setExternalTool(ExternalTool externalTool) { } public GuestbookResponse(){ - if(this.getFileDownload() == null){ - this.fileDownload = new FileDownload(); - this.fileDownload.setGuestbookResponse(this); - } } public GuestbookResponse(GuestbookResponse source){ @@ -154,7 +175,7 @@ public GuestbookResponse(GuestbookResponse source){ this.setDataset(source.getDataset()); this.setDatasetVersion(source.getDatasetVersion()); this.setAuthenticatedUser(source.getAuthenticatedUser()); - + this.setSessionId(source.getSessionId()); List customQuestionResponses = new ArrayList<>(); if (!source.getCustomQuestionResponses().isEmpty()){ for (CustomQuestionResponse customQuestionResponse : source.getCustomQuestionResponses() ){ @@ -167,7 +188,6 @@ public GuestbookResponse(GuestbookResponse source){ } this.setCustomQuestionResponses(customQuestionResponses); this.setGuestbook(source.getGuestbook()); - this.setFileDownload(source.getFileDownload()); } @@ -225,17 +245,11 @@ public Date getResponseTime() { public void setResponseTime(Date responseTime) { this.responseTime = responseTime; - this.getFileDownload().setDownloadTimestamp(responseTime); } public String getResponseDate() { return new SimpleDateFormat("MMMM d, yyyy").format(responseTime); } - - public String getResponseDateForDisplay(){ - return null; // SimpleDateFormat("yyyy").format(new Timestamp(new Date().getTime())); - } - public List getCustomQuestionResponses() { return customQuestionResponses; @@ -245,15 +259,14 @@ public void setCustomQuestionResponses(List customQuesti this.customQuestionResponses = customQuestionResponses; } - public FileDownload getFileDownload(){ - return fileDownload; + public List getFileAccessRequests(){ + return fileAccessRequests; } - - public void setFileDownload(FileDownload fDownload){ - this.fileDownload = fDownload; + + public void setFileAccessRequest(List fARs){ + this.fileAccessRequests = fARs; } - public Dataset getDataset() { return dataset; } @@ -286,22 +299,55 @@ public void setAuthenticatedUser(AuthenticatedUser authenticatedUser) { this.authenticatedUser = authenticatedUser; } - public String getDownloadtype() { - return this.fileDownload.getDownloadtype(); + public String getEventType() { + return this.eventType; } - public void setDownloadtype(String downloadtype) { - this.fileDownload.setDownloadtype(downloadtype); + public void setEventType(String eventType) { + this.eventType = eventType; } public String getSessionId() { - return this.fileDownload.getSessionId(); + return this.sessionId; } public void setSessionId(String sessionId) { - this.fileDownload.setSessionId(sessionId); + this.sessionId= sessionId; + } + + public String toHtmlFormattedResponse() { + + StringBuilder sb = new StringBuilder(); + + sb.append(BundleUtil.getStringFromBundle("dataset.guestbookResponse.id") + ": " + getId() + "
        \n"); + sb.append(BundleUtil.getStringFromBundle("dataset.guestbookResponse.date") + ": " + getResponseDate() + "
        \n"); + sb.append(BundleUtil.getStringFromBundle("dataset.guestbookResponse.respondent") + "
          \n
        • " + + BundleUtil.getStringFromBundle("name") + ": " + getName() + "
        • \n
        • "); + sb.append(" " + BundleUtil.getStringFromBundle("email") + ": " + getEmail() + "
        • \n
        • "); + sb.append( + " " + BundleUtil.getStringFromBundle("institution") + ": " + wrapNullAnswer(getInstitution()) + "
        • \n
        • "); + sb.append(" " + BundleUtil.getStringFromBundle("position") + ": " + wrapNullAnswer(getPosition()) + "
        \n"); + sb.append(BundleUtil.getStringFromBundle("dataset.guestbookResponse.guestbook.additionalQuestions") + + ":
          \n"); + + for (CustomQuestionResponse cqr : getCustomQuestionResponses()) { + sb.append("
        • " + BundleUtil.getStringFromBundle("dataset.guestbookResponse.question") + ": " + + cqr.getCustomQuestion().getQuestionString() + "
          " + + BundleUtil.getStringFromBundle("dataset.guestbookResponse.answer") + ": " + + wrapNullAnswer(cqr.getResponse()) + "
        • \n"); + } + sb.append("
        "); + return sb.toString(); + } + + private String wrapNullAnswer(String answer) { + //This assumes we don't have to distinguish null from when the user actually answers "(No Reponse)". The db still has the real value + if (answer == null) { + return BundleUtil.getStringFromBundle("dataset.guestbookResponse.noResponse"); + } + return answer; } @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java index bd598d2dca0..6c043b78941 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java @@ -63,15 +63,14 @@ public class GuestbookResponseServiceBean { + " and r.dataset_id = o.id " + " and r.guestbook_id = g.id ";*/ - private static final String BASE_QUERY_STRING_FOR_DOWNLOAD_AS_CSV = "select r.id, g.name, o.id, r.responsetime, f.downloadtype," + private static final String BASE_QUERY_STRING_FOR_DOWNLOAD_AS_CSV = "select r.id, g.name, o.id, r.responsetime, r.eventtype," + " m.label, r.dataFile_id, r.name, r.email, r.institution, r.position," + " o.protocol, o.authority, o.identifier, d.protocol, d.authority, d.identifier " - + "from guestbookresponse r, filedownload f, filemetadata m, dvobject o, guestbook g, dvobject d " + + "from guestbookresponse r, filemetadata m, dvobject o, guestbook g, dvobject d " + "where " + "m.datasetversion_id = (select max(datasetversion_id) from filemetadata where datafile_id =r.datafile_id ) " + " and m.datafile_id = r.datafile_id " + " and d.id = r.datafile_id " - + " and r.id = f.guestbookresponse_id " + " and r.dataset_id = o.id " + " and r.guestbook_id = g.id "; @@ -79,14 +78,13 @@ public class GuestbookResponseServiceBean { // on the guestbook-results.xhtml page (the info we show on the page is // less detailed than what we let the users download as CSV files, so this // query has fewer fields than the one above). -- L.A. - private static final String BASE_QUERY_STRING_FOR_PAGE_DISPLAY = "select r.id, v.value, r.responsetime, f.downloadtype, m.label, r.name " - + "from guestbookresponse r, filedownload f, datasetfieldvalue v, filemetadata m , dvobject o " + private static final String BASE_QUERY_STRING_FOR_PAGE_DISPLAY = "select r.id, v.value, r.responsetime, r.eventtype, m.label, r.name " + + "from guestbookresponse r, datasetfieldvalue v, filemetadata m , dvobject o " + "where " + " v.datasetfield_id = (select id from datasetfield f where datasetfieldtype_id = 1 " + " and datasetversion_id = (select max(id) from datasetversion where dataset_id =r.dataset_id )) " + " and m.datasetversion_id = (select max(datasetversion_id) from filemetadata where datafile_id =r.datafile_id ) " + " and m.datafile_id = r.datafile_id " - + " and r.id = f.guestbookresponse_id " + " and r.dataset_id = o.id "; // And a custom query for retrieving *all* the custom question responses, for @@ -434,7 +432,7 @@ public Long findCountByGuestbookId(Long guestbookId, Long dataverseId) { Query query = em.createNativeQuery(queryString); return (Long) query.getSingleResult(); } else { - String queryString = "select count(o) from GuestbookResponse as o, Dataset d, DvObject obj where o.dataset_id = d.id and d.id = obj.id and obj.owner_id = " + dataverseId + "and o.guestbook_id = " + guestbookId; + String queryString = "select count(o) from GuestbookResponse as o, Dataset d, DvObject obj where o.dataset_id = d.id and d.id = obj.id and obj.owner_id = " + dataverseId + " and o.guestbook_id = " + guestbookId; Query query = em.createNativeQuery(queryString); return (Long) query.getSingleResult(); } @@ -641,6 +639,9 @@ public GuestbookResponse initGuestbookResponseForFragment(DatasetVersion working GuestbookResponse guestbookResponse = new GuestbookResponse(); + //Not otherwise set for multi-file downloads + guestbookResponse.setDatasetVersion(workingVersion); + if(workingVersion.isDraft()){ guestbookResponse.setWriteResponse(false); } @@ -667,7 +668,7 @@ public GuestbookResponse initGuestbookResponseForFragment(DatasetVersion working if (dataset.getGuestbook() != null && !dataset.getGuestbook().getCustomQuestions().isEmpty()) { initCustomQuestions(guestbookResponse, dataset); } - guestbookResponse.setDownloadtype("Download"); + guestbookResponse.setEventType(GuestbookResponse.DOWNLOAD); guestbookResponse.setDataset(dataset); @@ -721,9 +722,9 @@ public GuestbookResponse initGuestbookResponse(FileMetadata fileMetadata, String if (dataset.getGuestbook() != null && !dataset.getGuestbook().getCustomQuestions().isEmpty()) { initCustomQuestions(guestbookResponse, dataset); } - guestbookResponse.setDownloadtype("Download"); + guestbookResponse.setEventType(GuestbookResponse.DOWNLOAD); if(downloadFormat.toLowerCase().equals("subset")){ - guestbookResponse.setDownloadtype("Subset"); + guestbookResponse.setEventType(GuestbookResponse.SUBSET); } if(downloadFormat.toLowerCase().equals("explore")){ /** @@ -734,12 +735,12 @@ public GuestbookResponse initGuestbookResponse(FileMetadata fileMetadata, String * "externalTool" for all external tools, including TwoRavens. When * clicking "Explore" and then the name of the tool, we want the * name of the exploration tool (i.e. "Data Explorer", - * etc.) to be persisted as the downloadType. We execute - * guestbookResponse.setDownloadtype(externalTool.getDisplayName()) + * etc.) to be persisted as the eventType. We execute + * guestbookResponse.setEventType(externalTool.getDisplayName()) * over in the "explore" method of FileDownloadServiceBean just * before the guestbookResponse is written. */ - guestbookResponse.setDownloadtype("Explore"); + guestbookResponse.setEventType(GuestbookResponse.EXPLORE); } guestbookResponse.setDataset(dataset); @@ -818,7 +819,7 @@ public GuestbookResponse initDefaultGuestbookResponse(Dataset dataset, DataFile guestbookResponse.setDataset(dataset); guestbookResponse.setResponseTime(new Date()); guestbookResponse.setSessionId(session.toString()); - guestbookResponse.setDownloadtype("Download"); + guestbookResponse.setEventType(GuestbookResponse.DOWNLOAD); setUserDefaultResponses(guestbookResponse, session); return guestbookResponse; } @@ -839,7 +840,7 @@ public GuestbookResponse initAPIGuestbookResponse(Dataset dataset, DataFile data guestbookResponse.setDataset(dataset); guestbookResponse.setResponseTime(new Date()); guestbookResponse.setSessionId(session.toString()); - guestbookResponse.setDownloadtype("Download"); + guestbookResponse.setEventType(GuestbookResponse.DOWNLOAD); setUserDefaultResponses(guestbookResponse, session, user); return guestbookResponse; } @@ -903,29 +904,36 @@ public void save(GuestbookResponse guestbookResponse) { em.persist(guestbookResponse); } + + /* + * Metrics - download counts from GuestbookResponses: Any GuestbookResponse that + * is not of eventtype=='AccessRequest' is considered a download. This includes + * actual 'Download's, downloads of 'Subset's, and use by 'Explore' tools and + * previewers (where eventtype is the previewer name) + */ - public Long getCountGuestbookResponsesByDataFileId(Long dataFileId) { + public Long getDownloadCountByDataFileId(Long dataFileId) { // datafile id is null, will return 0 - Query query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.datafile_id = " + dataFileId); + Query query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.datafile_id = " + dataFileId + " and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'"); return (Long) query.getSingleResult(); } - public Long getCountGuestbookResponsesByDatasetId(Long datasetId) { - return getCountGuestbookResponsesByDatasetId(datasetId, null); + public Long getDownloadCountByDatasetId(Long datasetId) { + return getDownloadCountByDatasetId(datasetId, null); } - public Long getCountGuestbookResponsesByDatasetId(Long datasetId, LocalDate date) { + public Long getDownloadCountByDatasetId(Long datasetId, LocalDate date) { // dataset id is null, will return 0 Query query; if(date != null) { - query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId + " and responsetime < '" + date.toString() + "'"); + query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId + " and responsetime < '" + date.toString() + "' and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'"); }else { - query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId); + query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId+ " and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'"); } return (Long) query.getSingleResult(); } - public Long getCountOfAllGuestbookResponses() { + public Long getTotalDownloadCount() { // dataset id is null, will return 0 // "SELECT COUNT(*)" is notoriously expensive in PostgresQL for large @@ -954,10 +962,12 @@ public Long getCountOfAllGuestbookResponses() { } catch (IllegalArgumentException iae) { // Don't do anything, we'll fall back to using "SELECT COUNT()" } - Query query = em.createNativeQuery("select count(o.id) from GuestbookResponse o;"); + Query query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"';"); return (Long) query.getSingleResult(); } + //End Metrics/download counts + public List findByAuthenticatedUserId(AuthenticatedUser user) { Query query = em.createNamedQuery("GuestbookResponse.findByAuthenticatedUserId"); query.setParameter("authenticatedUserId", user.getId()); diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java index 6dbba34920b..0b66b652e0c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java @@ -30,6 +30,8 @@ import jakarta.faces.view.ViewScoped; import jakarta.inject.Inject; import jakarta.inject.Named; +import java.util.HashMap; +import java.util.Map; import org.apache.commons.lang3.StringUtils; /** @@ -430,44 +432,92 @@ public boolean isSessionUserAuthenticated() { return false; } + // The numbers of datasets and deleted/exported records below are used + // in rendering rules on the page. They absolutely need to be cached + // on the first lookup. + + Map cachedSetInfoNumDatasets = new HashMap<>(); + public int getSetInfoNumOfDatasets(OAISet oaiSet) { if (oaiSet.isDefaultSet()) { return getSetInfoNumOfExported(oaiSet); } + if (cachedSetInfoNumDatasets.get(oaiSet.getSpec()) != null) { + return cachedSetInfoNumDatasets.get(oaiSet.getSpec()); + } + String query = oaiSet.getDefinition(); try { int num = oaiSetService.validateDefinitionQuery(query); if (num > -1) { + cachedSetInfoNumDatasets.put(oaiSet.getSpec(), num); return num; } } catch (OaiSetException ose) { - // do notghin - will return zero. + // do nothing - will return zero. } + cachedSetInfoNumDatasets.put(oaiSet.getSpec(), 0); return 0; } + Map cachedSetInfoNumExported = new HashMap<>(); + Integer defaultSetNumExported = null; + public int getSetInfoNumOfExported(OAISet oaiSet) { + if (oaiSet.isDefaultSet() && defaultSetNumExported != null) { + return defaultSetNumExported; + } else if (cachedSetInfoNumExported.get(oaiSet.getSpec()) != null) { + return cachedSetInfoNumExported.get(oaiSet.getSpec()); + } + List records = oaiRecordService.findActiveOaiRecordsBySetName(oaiSet.getSpec()); + int num; + if (records == null || records.isEmpty()) { - return 0; + num = 0; + } else { + num = records.size(); } - return records.size(); + if (oaiSet.isDefaultSet()) { + defaultSetNumExported = num; + } else { + cachedSetInfoNumExported.put(oaiSet.getSpec(), num); + } + return num; } + Map cachedSetInfoNumDeleted = new HashMap<>(); + Integer defaultSetNumDeleted = null; + public int getSetInfoNumOfDeleted(OAISet oaiSet) { + if (oaiSet.isDefaultSet() && defaultSetNumDeleted != null) { + return defaultSetNumDeleted; + } else if (cachedSetInfoNumDeleted.get(oaiSet.getSpec()) != null) { + return cachedSetInfoNumDeleted.get(oaiSet.getSpec()); + } + List records = oaiRecordService.findDeletedOaiRecordsBySetName(oaiSet.getSpec()); + int num; + if (records == null || records.isEmpty()) { - return 0; + num = 0; + } else { + num = records.size(); } - return records.size(); + if (oaiSet.isDefaultSet()) { + defaultSetNumDeleted = num; + } else { + cachedSetInfoNumDeleted.put(oaiSet.getSpec(), num); + } + return num; } public void validateSetQuery() { diff --git a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java index f17732df7b6..4b591d240bd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java @@ -382,12 +382,21 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio logger.fine(dataverseCreatedMessage); return messageText += dataverseCreatedMessage; case REQUESTFILEACCESS: + //Notification to those who can grant file access requests on a dataset when a user makes a request DataFile datafile = (DataFile) targetObject; + pattern = BundleUtil.getStringFromBundle("notification.email.requestFileAccess"); String requestorName = (requestor.getLastName() != null && requestor.getLastName() != null) ? requestor.getFirstName() + " " + requestor.getLastName() : BundleUtil.getStringFromBundle("notification.email.info.unavailable"); String requestorEmail = requestor.getEmail() != null ? requestor.getEmail() : BundleUtil.getStringFromBundle("notification.email.info.unavailable"); String[] paramArrayRequestFileAccess = {datafile.getOwner().getDisplayName(), requestorName, requestorEmail, getDatasetManageFileAccessLink(datafile)}; + messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); messageText += MessageFormat.format(pattern, paramArrayRequestFileAccess); + FileAccessRequest far = datafile.getAccessRequestForAssignee(requestor); + GuestbookResponse gbr = far.getGuestbookResponse(); + if (gbr != null) { + messageText += MessageFormat.format( + BundleUtil.getStringFromBundle("notification.email.requestFileAccess.guestbookResponse"), gbr.toHtmlFormattedResponse()); + } return messageText; case GRANTFILEACCESS: dataset = (Dataset) targetObject; @@ -457,18 +466,24 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio case RETURNEDDS: version = (DatasetVersion) targetObject; pattern = BundleUtil.getStringFromBundle("notification.email.wasReturnedByReviewer"); - String optionalReturnReason = ""; - /* - FIXME - Setting up to add single comment when design completed - optionalReturnReason = "."; - if (comment != null && !comment.isEmpty()) { - optionalReturnReason = ".\n\n" + BundleUtil.getStringFromBundle("wasReturnedReason") + "\n\n" + comment; - } - */ + String[] paramArrayReturnedDataset = {version.getDataset().getDisplayName(), getDatasetDraftLink(version.getDataset()), - version.getDataset().getOwner().getDisplayName(), getDataverseLink(version.getDataset().getOwner()), optionalReturnReason}; + version.getDataset().getOwner().getDisplayName(), getDataverseLink(version.getDataset().getOwner())}; messageText += MessageFormat.format(pattern, paramArrayReturnedDataset); + + if (comment != null && !comment.isEmpty()) { + messageText += "\n\n" + MessageFormat.format(BundleUtil.getStringFromBundle("notification.email.wasReturnedByReviewerReason"), comment); + } + + Dataverse d = (Dataverse) version.getDataset().getOwner(); + List contactEmailList = new ArrayList(); + for (DataverseContact dc : d.getDataverseContacts()) { + contactEmailList.add(dc.getContactEmail()); + } + if (!contactEmailList.isEmpty()) { + String contactEmails = String.join(", ", contactEmailList); + messageText += "\n\n" + MessageFormat.format(BundleUtil.getStringFromBundle("notification.email.wasReturnedByReviewer.collectionContacts"), contactEmails); + } return messageText; case WORKFLOW_SUCCESS: @@ -630,6 +645,20 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio dataset.getDisplayName()}; messageText = MessageFormat.format(pattern, paramArrayDatasetMentioned); return messageText; + case REQUESTEDFILEACCESS: + //Notification to requestor when they make a request + datafile = (DataFile) targetObject; + + pattern = BundleUtil.getStringFromBundle("notification.email.requestedFileAccess"); + messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); + messageText += MessageFormat.format(pattern, getDvObjectLink(datafile), datafile.getOwner().getDisplayName()); + far = datafile.getAccessRequestForAssignee(requestor); + gbr = far.getGuestbookResponse(); + if (gbr != null) { + messageText += MessageFormat.format( + BundleUtil.getStringFromBundle("notification.email.requestFileAccess.guestbookResponse"), gbr.toHtmlFormattedResponse()); + } + return messageText; } return ""; @@ -650,6 +679,7 @@ public Object getObjectOfNotification (UserNotification userNotification){ case CREATEDV: return dataverseService.find(userNotification.getObjectId()); case REQUESTFILEACCESS: + case REQUESTEDFILEACCESS: return dataFileService.find(userNotification.getObjectId()); case GRANTFILEACCESS: case REJECTFILEACCESS: diff --git a/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java b/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java index 1b4af29c915..ca2f6145cba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java @@ -27,6 +27,8 @@ import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.stream.Collectors; + import jakarta.ejb.EJB; import jakarta.faces.application.FacesMessage; import jakarta.faces.event.ActionEvent; @@ -71,6 +73,8 @@ public class ManageFilePermissionsPage implements java.io.Serializable { DataverseRequestServiceBean dvRequestService; @Inject PermissionsWrapper permissionsWrapper; + @EJB + FileAccessRequestServiceBean fileAccessRequestService; @PersistenceContext(unitName = "VDCNet-ejbPU") EntityManager em; @@ -85,6 +89,15 @@ public class ManageFilePermissionsPage implements java.io.Serializable { public TreeMap> getFileAccessRequestMap() { return fileAccessRequestMap; } + + public List getDataFilesForRequestor() { + List fars = fileAccessRequestMap.get(getFileRequester()); + if (fars == null) { + return new ArrayList<>(); + } else { + return fars.stream().map(FileAccessRequest::getDataFile).collect(Collectors.toList()); + } + } private final TreeMap> fileAccessRequestMap = new TreeMap<>(); private boolean showDeleted = true; @@ -177,14 +190,14 @@ private void initMaps() { fileMap.put(file, raList); // populate the file access requests map - for (FileAccessRequest fileAccessRequest : file.getFileAccessRequests()) { - List requestedFiles = fileAccessRequestMap.get(fileAccessRequest.getAuthenticatedUser()); - if (requestedFiles == null) { - requestedFiles = new ArrayList<>(); - AuthenticatedUser withProvider = authenticationService.getAuthenticatedUserWithProvider(fileAccessRequest.getAuthenticatedUser().getUserIdentifier()); - fileAccessRequestMap.put(withProvider, requestedFiles); + for (FileAccessRequest fileAccessRequest : file.getFileAccessRequests(FileAccessRequest.RequestState.CREATED)) { + List fileAccessRequestList = fileAccessRequestMap.get(fileAccessRequest.getRequester()); + if (fileAccessRequestList == null) { + fileAccessRequestList = new ArrayList<>(); + AuthenticatedUser withProvider = authenticationService.getAuthenticatedUserWithProvider(fileAccessRequest.getRequester().getUserIdentifier()); + fileAccessRequestMap.put(withProvider, fileAccessRequestList); } - requestedFiles.add(fileAccessRequest); + fileAccessRequestList.add(fileAccessRequest); } } } @@ -406,16 +419,19 @@ public void grantAccess(ActionEvent evt) { if (file.isReleased()) { sendNotification = true; } - // remove request, if it exist - if (file.removeFileAccessRequester(roleAssignee)) { - datafileService.save(file); + // set request(s) granted, if they exist + for (AuthenticatedUser au : roleAssigneeService.getExplicitUsers(roleAssignee)) { + FileAccessRequest far = file.getAccessRequestForAssignee(au); + far.setStateGranted(); } + datafileService.save(file); } + } if (sendNotification) { for (AuthenticatedUser au : roleAssigneeService.getExplicitUsers(roleAssignee)) { - userNotificationService.sendNotification(au, new Timestamp(new Date().getTime()), UserNotification.Type.GRANTFILEACCESS, dataset.getId()); + userNotificationService.sendNotification(au, new Timestamp(new Date().getTime()), UserNotification.Type.GRANTFILEACCESS, dataset.getId()); } } } @@ -443,7 +459,9 @@ private void grantAccessToRequests(AuthenticatedUser au, List files) { DataverseRole fileDownloaderRole = roleService.findBuiltinRoleByAlias(DataverseRole.FILE_DOWNLOADER); for (DataFile file : files) { if (assignRole(au, file, fileDownloaderRole)) { - if (file.removeFileAccessRequester(au)) { + FileAccessRequest far = file.getAccessRequestForAssignee(au); + if (far!=null) { + far.setStateGranted(); datafileService.save(file); } actionPerformed = true; @@ -475,9 +493,14 @@ public void rejectAccessToAllRequests(AuthenticatedUser au) { private void rejectAccessToRequests(AuthenticatedUser au, List files) { boolean actionPerformed = false; for (DataFile file : files) { - file.removeFileAccessRequester(au); - datafileService.save(file); - actionPerformed = true; + FileAccessRequest far = file.getAccessRequestForAssignee(au); + if(far!=null) { + far.setStateRejected(); + fileAccessRequestService.save(far); + file.removeFileAccessRequest(far); + datafileService.save(file); + actionPerformed = true; + } } if (actionPerformed) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ManagePermissionsPage.java b/src/main/java/edu/harvard/iq/dataverse/ManagePermissionsPage.java index bf78b9d088f..0e277c5aa32 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ManagePermissionsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ManagePermissionsPage.java @@ -55,6 +55,8 @@ public class ManagePermissionsPage implements java.io.Serializable { @EJB DvObjectServiceBean dvObjectService; @EJB + FileAccessRequestServiceBean fileAccessRequestService; + @EJB DataverseRoleServiceBean roleService; @EJB RoleAssigneeServiceBean roleAssigneeService; diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index a1de33a764e..8fb762e3e5b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -41,6 +41,9 @@ import java.util.stream.Collectors; import static java.util.stream.Collectors.toList; import jakarta.persistence.Query; +import jakarta.persistence.criteria.CriteriaBuilder; +import jakarta.persistence.criteria.CriteriaQuery; +import jakarta.persistence.criteria.Root; /** * Your one-stop-shop for deciding which user can do what action on which @@ -448,8 +451,9 @@ private boolean isPublicallyDownloadable(DvObject dvo) { if (!df.isRestricted()) { if (df.getOwner().getReleasedVersion() != null) { - if (df.getOwner().getReleasedVersion().getFileMetadatas() != null) { - for (FileMetadata fm : df.getOwner().getReleasedVersion().getFileMetadatas()) { + List fileMetadatas = df.getOwner().getReleasedVersion().getFileMetadatas(); + if (fileMetadatas != null) { + for (FileMetadata fm : fileMetadatas) { if (df.equals(fm.getDataFile())) { return true; } @@ -837,4 +841,57 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio return false; } + /** + * Checks if a DataverseRequest can download at least one file of the target DatasetVersion. + * + * @param dataverseRequest DataverseRequest to check + * @param datasetVersion DatasetVersion to check + * @return boolean indicating whether the user can download at least one file or not + */ + public boolean canDownloadAtLeastOneFile(DataverseRequest dataverseRequest, DatasetVersion datasetVersion) { + if (hasUnrestrictedReleasedFiles(datasetVersion)) { + return true; + } + List fileMetadatas = datasetVersion.getFileMetadatas(); + for (FileMetadata fileMetadata : fileMetadatas) { + DataFile dataFile = fileMetadata.getDataFile(); + Set roleAssignees = new HashSet<>(groupService.groupsFor(dataverseRequest, dataFile)); + roleAssignees.add(dataverseRequest.getUser()); + if (hasGroupPermissionsFor(roleAssignees, dataFile, EnumSet.of(Permission.DownloadFile))) { + return true; + } + } + return false; + } + + /** + * Checks if a DatasetVersion has unrestricted released files. + * + * This method is mostly based on {@link #isPublicallyDownloadable(DvObject)} although in this case, instead of basing + * the search on a particular file, it searches for the total number of files in the target version that are present + * in the released version. + * + * @param targetDatasetVersion DatasetVersion to check + * @return boolean indicating whether the dataset version has released files or not + */ + private boolean hasUnrestrictedReleasedFiles(DatasetVersion targetDatasetVersion) { + Dataset targetDataset = targetDatasetVersion.getDataset(); + if (!targetDataset.isReleased()) { + return false; + } + CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder(); + CriteriaQuery criteriaQuery = criteriaBuilder.createQuery(Long.class); + Root datasetVersionRoot = criteriaQuery.from(DatasetVersion.class); + Root fileMetadataRoot = criteriaQuery.from(FileMetadata.class); + criteriaQuery + .select(criteriaBuilder.count(fileMetadataRoot)) + .where(criteriaBuilder.and( + criteriaBuilder.equal(fileMetadataRoot.get("dataFile").get("restricted"), false), + criteriaBuilder.equal(datasetVersionRoot.get("dataset"), targetDataset), + criteriaBuilder.equal(datasetVersionRoot.get("versionState"), DatasetVersion.VersionState.RELEASED), + fileMetadataRoot.in(targetDatasetVersion.getFileMetadatas()), + fileMetadataRoot.in(datasetVersionRoot.get("fileMetadatas")))); + Long result = em.createQuery(criteriaQuery).getSingleResult(); + return result > 0; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java index 059d5a8ffd3..88acc1916cf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java @@ -11,6 +11,7 @@ import edu.harvard.iq.dataverse.authorization.groups.impl.explicit.ExplicitGroupServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.GuestUser; +import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.mydata.MyDataFilterParams; import edu.harvard.iq.dataverse.privateurl.PrivateUrlUtil; @@ -96,18 +97,18 @@ public RoleAssignee getRoleAssignee(String identifier, Boolean augmented) { if (identifier == null || identifier.isEmpty()) { throw new IllegalArgumentException("Identifier cannot be null or empty string."); } - switch (identifier.charAt(0)) { - case ':': + switch (identifier.substring(0,1)) { + case ":": return predefinedRoleAssignees.get(identifier); - case '@': + case AuthenticatedUser.IDENTIFIER_PREFIX: if (!augmented){ return authSvc.getAuthenticatedUser(identifier.substring(1)); } else { return authSvc.getAuthenticatedUserWithProvider(identifier.substring(1)); - } - case '&': + } + case Group.IDENTIFIER_PREFIX: return groupSvc.getGroup(identifier.substring(1)); - case '#': + case PrivateUrlUser.PREFIX: return PrivateUrlUtil.identifier2roleAssignee(identifier); default: throw new IllegalArgumentException("Unsupported assignee identifier '" + identifier + "'"); diff --git a/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java b/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java index 71318a0184a..a387b27d98b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java @@ -17,6 +17,7 @@ import com.amazonaws.services.s3.model.S3Object; import com.amazonaws.services.s3.model.S3ObjectSummary; import edu.harvard.iq.dataverse.api.AbstractApiBean; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import java.io.BufferedReader; @@ -203,35 +204,21 @@ public DataFile createPackageDataFile(Dataset dataset, String folderName, long t fmd.setDatasetVersion(dataset.getLatestVersion()); FileUtil.generateS3PackageStorageIdentifier(packageFile); - - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(packageFile.getProtocol(), commandEngine.getContext()); + PidProvider pidProvider = commandEngine.getContext().dvObjects().getEffectivePidGenerator(dataset); if (packageFile.getIdentifier() == null || packageFile.getIdentifier().isEmpty()) { - String packageIdentifier = idServiceBean.generateDataFileIdentifier(packageFile); - packageFile.setIdentifier(packageIdentifier); - } - - String nonNullDefaultIfKeyNotFound = ""; - String protocol = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); - String authority = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); - - if (packageFile.getProtocol() == null) { - packageFile.setProtocol(protocol); - } - if (packageFile.getAuthority() == null) { - packageFile.setAuthority(authority); + pidProvider.generatePid(packageFile); } if (!packageFile.isIdentifierRegistered()) { String doiRetString = ""; - idServiceBean = GlobalIdServiceBean.getBean(commandEngine.getContext()); try { - doiRetString = idServiceBean.createIdentifier(packageFile); + doiRetString = pidProvider.createIdentifier(packageFile); } catch (Throwable e) { } // Check return value to make sure registration succeeded - if (!idServiceBean.registerWhenPublished() && doiRetString.contains(packageFile.getIdentifier())) { + if (!pidProvider.registerWhenPublished() && doiRetString.contains(packageFile.getIdentifier())) { packageFile.setIdentifierRegistered(true); packageFile.setGlobalIdCreateTime(new Date()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java b/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java index 6be768321c4..68912969003 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java +++ b/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java @@ -6,6 +6,7 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.JsfHelper; import edu.harvard.iq.dataverse.util.MailUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import java.util.Optional; @@ -217,6 +218,7 @@ public String sendMessage() { } logger.fine("sending feedback: " + feedback); mailService.sendMail(feedback.getFromEmail(), feedback.getToEmail(), feedback.getCcEmail(), feedback.getSubject(), feedback.getBody()); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("contact.sent")); return null; } diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index 307301049f0..964c58b75f6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -6,6 +6,9 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.branding.BrandingUtil; +import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.Setting; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; @@ -24,6 +27,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.logging.Logger; import java.util.Set; @@ -332,12 +336,29 @@ public boolean isGlobusFileDownload() { } public boolean isGlobusEnabledStorageDriver(String driverId) { - if (globusStoreList == null) { - globusStoreList = systemConfig.getGlobusStoresList(); + return (GlobusAccessibleStore.acceptsGlobusTransfers(driverId) || GlobusAccessibleStore.allowsGlobusReferences(driverId)); + } + + public boolean isDownloadable(FileMetadata fmd) { + boolean downloadable=true; + if(isGlobusFileDownload()) { + String driverId = DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier()); + + downloadable = downloadable && !AbstractRemoteOverlayAccessIO.isNotDataverseAccessible(driverId); } - return globusStoreList.contains(driverId); + return downloadable; } + public boolean isGlobusTransferable(FileMetadata fmd) { + boolean globusTransferable=true; + if(isGlobusFileDownload()) { + String driverId = DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier()); + globusTransferable = GlobusAccessibleStore.isGlobusAccessible(driverId); + } + return globusTransferable; + } + + public String getGlobusAppUrl() { if (globusAppUrl == null) { globusAppUrl = settingsService.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost"); @@ -379,13 +400,6 @@ public boolean isHTTPUpload(){ return httpUpload; } - public boolean isDataFilePIDSequentialDependent(){ - if (dataFilePIDSequentialDependent == null) { - dataFilePIDSequentialDependent = systemConfig.isDataFilePIDSequentialDependent(); - } - return dataFilePIDSequentialDependent; - } - public String getSupportTeamName() { String systemEmail = getValueForKey(SettingsServiceBean.Key.SystemEmail); InternetAddress systemAddress = MailUtil.parseSystemAddress(systemEmail); @@ -449,23 +463,6 @@ public Map getConfiguredLocales() { return configuredLocales; } - public boolean isDoiInstallation() { - String protocol = getValueForKey(SettingsServiceBean.Key.Protocol); - if ("doi".equals(protocol)) { - return true; - } else { - return false; - } - } - - public boolean isDataCiteInstallation() { - String protocol = getValueForKey(SettingsServiceBean.Key.DoiProvider); - if ("DataCite".equals(protocol)) { - return true; - } else { - return false; - } - } public boolean isMakeDataCountDisplayEnabled() { boolean safeDefaultIfKeyNotFound = (getValueForKey(SettingsServiceBean.Key.MDCLogPath)!=null); //Backward compatible @@ -594,7 +591,7 @@ public Map getBaseMetadataLanguageMap(boolean refresh) { public Map getMetadataLanguages(DvObjectContainer target) { Map currentMap = new HashMap(); currentMap.putAll(getBaseMetadataLanguageMap(false)); - currentMap.put(DvObjectContainer.UNDEFINED_METADATA_LANGUAGE_CODE, getDefaultMetadataLanguageLabel(target)); + currentMap.put(DvObjectContainer.UNDEFINED_CODE, getDefaultMetadataLanguageLabel(target)); return currentMap; } @@ -605,7 +602,7 @@ private String getDefaultMetadataLanguageLabel(DvObjectContainer target) { String mlCode = target.getOwner().getEffectiveMetadataLanguage(); // If it's undefined, no parent has a metadata language defined, and the global default should be used. - if (!mlCode.equals(DvObjectContainer.UNDEFINED_METADATA_LANGUAGE_CODE)) { + if (!mlCode.equals(DvObjectContainer.UNDEFINED_CODE)) { // Get the label for the language code found mlLabel = getBaseMetadataLanguageMap(false).get(mlCode); mlLabel = mlLabel + " " + BundleUtil.getStringFromBundle("dataverse.inherited"); @@ -623,7 +620,7 @@ public String getDefaultMetadataLanguage() { return (String) mdMap.keySet().toArray()[0]; } else { //More than one - :MetadataLanguages is set and the default is undefined (users must choose if the collection doesn't override the default) - return DvObjectContainer.UNDEFINED_METADATA_LANGUAGE_CODE; + return DvObjectContainer.UNDEFINED_CODE; } } else { // None - :MetadataLanguages is not set so return null to turn off the display (backward compatibility) @@ -631,6 +628,32 @@ public String getDefaultMetadataLanguage() { } } + public Map getGuestbookEntryOptions(DvObjectContainer target) { + Map currentMap = new HashMap(); + String atDownload = BundleUtil.getStringFromBundle("dataverse.guestbookentry.atdownload"); + String atRequest = BundleUtil.getStringFromBundle("dataverse.guestbookentry.atrequest"); + Optional gbDefault = JvmSettings.GUESTBOOK_AT_REQUEST.lookupOptional(Boolean.class); + if (gbDefault.isPresent()) { + // Three options - inherited/default option, at Download, at Request + String useDefault = null; + if (target.getOwner() == null) { + boolean defaultOption = gbDefault.get(); + useDefault = (defaultOption ? atRequest : atDownload) + + BundleUtil.getStringFromBundle("dataverse.default"); + } else { + boolean defaultOption = target.getOwner().getEffectiveGuestbookEntryAtRequest(); + useDefault = (defaultOption ? atRequest : atDownload) + + BundleUtil.getStringFromBundle("dataverse.inherited"); + } + currentMap.put(DvObjectContainer.UNDEFINED_CODE, useDefault); + currentMap.put(Boolean.toString(true), atRequest); + currentMap.put(Boolean.toString(false), atDownload); + } else { + // Setting not defined - leave empty + } + return currentMap; + } + public Dataverse getRootDataverse() { if (rootDataverse == null) { rootDataverse = dataverseService.findRootDataverse(); diff --git a/src/main/java/edu/harvard/iq/dataverse/Shib.java b/src/main/java/edu/harvard/iq/dataverse/Shib.java index bee1182e248..24c0f9d7926 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Shib.java +++ b/src/main/java/edu/harvard/iq/dataverse/Shib.java @@ -24,6 +24,7 @@ import java.util.Arrays; import java.util.Date; import java.util.List; +import java.util.logging.Level; import java.util.logging.Logger; import jakarta.ejb.EJB; import jakarta.ejb.EJBException; @@ -62,7 +63,7 @@ public class Shib implements java.io.Serializable { HttpServletRequest request; private String userPersistentId; - private String internalUserIdentifer; + private String internalUserIdentifier; AuthenticatedUserDisplayInfo displayInfo; /** * @todo Remove this boolean some day? Now the mockups show a popup. Should @@ -210,8 +211,8 @@ public void init() { } String usernameAssertion = getValueFromAssertion(ShibUtil.usernameAttribute); - internalUserIdentifer = ShibUtil.generateFriendlyLookingUserIdentifer(usernameAssertion, emailAddress); - logger.fine("friendly looking identifer (backend will enforce uniqueness):" + internalUserIdentifer); + internalUserIdentifier = ShibUtil.generateFriendlyLookingUserIdentifier(usernameAssertion, emailAddress); + logger.log(Level.FINE, "friendly looking identifier (backend will enforce uniqueness): {0}", internalUserIdentifier); String shibAffiliationAttribute = settingsService.getValueForKey(SettingsServiceBean.Key.ShibAffiliationAttribute); String affiliation = (StringUtils.isNotBlank(shibAffiliationAttribute)) @@ -326,7 +327,7 @@ public String confirmAndCreateAccount() { AuthenticatedUser au = null; try { au = authSvc.createAuthenticatedUser( - new UserRecordIdentifier(shibAuthProvider.getId(), lookupStringPerAuthProvider), internalUserIdentifer, displayInfo, true); + new UserRecordIdentifier(shibAuthProvider.getId(), lookupStringPerAuthProvider), internalUserIdentifier, displayInfo, true); } catch (EJBException ex) { /** * @todo Show the ConstraintViolationException, if any. @@ -354,7 +355,7 @@ public String confirmAndConvertAccount() { visibleTermsOfUse = false; ShibAuthenticationProvider shibAuthProvider = new ShibAuthenticationProvider(); String lookupStringPerAuthProvider = userPersistentId; - UserIdentifier userIdentifier = new UserIdentifier(lookupStringPerAuthProvider, internalUserIdentifer); + UserIdentifier userIdentifier = new UserIdentifier(lookupStringPerAuthProvider, internalUserIdentifier); logger.fine("builtin username: " + builtinUsername); AuthenticatedUser builtInUserToConvert = authSvc.canLogInAsBuiltinUser(builtinUsername, builtinPassword); if (builtInUserToConvert != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/TemplatePage.java b/src/main/java/edu/harvard/iq/dataverse/TemplatePage.java index fff520fd259..44070dcbb41 100644 --- a/src/main/java/edu/harvard/iq/dataverse/TemplatePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/TemplatePage.java @@ -148,6 +148,7 @@ public String init() { editMode = TemplatePage.EditMode.CREATE; template = new Template(this.dataverse, settingsWrapper.getSystemMetadataBlocks()); TermsOfUseAndAccess terms = new TermsOfUseAndAccess(); + terms.setFileAccessRequest(true); terms.setTemplate(template); terms.setLicense(licenseServiceBean.getDefault()); template.setTermsOfUseAndAccess(terms); diff --git a/src/main/java/edu/harvard/iq/dataverse/ThemeWidgetFragment.java b/src/main/java/edu/harvard/iq/dataverse/ThemeWidgetFragment.java index 9a62a99722a..f30051e26ae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThemeWidgetFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThemeWidgetFragment.java @@ -7,6 +7,7 @@ import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseThemeCommand; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.JsfHelper; import java.io.File; @@ -14,6 +15,7 @@ import java.net.MalformedURLException; import java.net.URL; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardCopyOption; import java.util.logging.Level; @@ -49,6 +51,8 @@ public class ThemeWidgetFragment implements java.io.Serializable { static final String DEFAULT_TEXT_COLOR = "888888"; private static final Logger logger = Logger.getLogger(ThemeWidgetFragment.class.getCanonicalName()); + public static final String LOGOS_SUBDIR = "logos"; + public static final String LOGOS_TEMP_SUBDIR = LOGOS_SUBDIR + File.separator + "temp"; private File tempDir; private File uploadedFile; @@ -86,12 +90,18 @@ public void setTaglineInput(HtmlInputText taglineInput) { } - + public static Path getLogoDir(String ownerId) { + return Path.of(JvmSettings.DOCROOT_DIRECTORY.lookup(), LOGOS_SUBDIR, ownerId); + } - private void createTempDir() { + private void createTempDir() { try { - File tempRoot = Files.createDirectories(Paths.get("../docroot/logos/temp")).toFile(); - tempDir = Files.createTempDirectory(tempRoot.toPath(),editDv.getId().toString()).toFile(); + // Create the temporary space if not yet existing (will silently ignore preexisting) + // Note that the docroot directory is checked within ConfigCheckService for presence and write access. + Path tempRoot = Path.of(JvmSettings.DOCROOT_DIRECTORY.lookup(), LOGOS_TEMP_SUBDIR); + Files.createDirectories(tempRoot); + + this.tempDir = Files.createTempDirectory(tempRoot, editDv.getId().toString()).toFile(); } catch (IOException e) { throw new RuntimeException("Error creating temp directory", e); // improve error handling } diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index 319ae06eefb..b6ab23848e2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -6,24 +6,21 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.dataaccess.DataAccess; -import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; - -import static edu.harvard.iq.dataverse.dataset.DatasetUtil.datasetLogoThumbnail; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.search.SolrSearchResult; -import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.IOException; -import java.io.InputStream; -import java.util.Base64; import java.util.HashMap; import java.util.Map; +import java.util.logging.Logger; + import jakarta.ejb.EJB; import jakarta.enterprise.context.RequestScoped; -import jakarta.faces.view.ViewScoped; import jakarta.inject.Inject; import jakarta.inject.Named; -import org.apache.commons.io.IOUtils; /** * @@ -33,6 +30,9 @@ @RequestScoped @Named public class ThumbnailServiceWrapper implements java.io.Serializable { + + private static final Logger logger = Logger.getLogger(ThumbnailServiceWrapper.class.getCanonicalName()); + @Inject PermissionsWrapper permissionsWrapper; @EJB @@ -46,49 +46,7 @@ public class ThumbnailServiceWrapper implements java.io.Serializable { private Map dvobjectThumbnailsMap = new HashMap<>(); private Map dvobjectViewMap = new HashMap<>(); - - private String getAssignedDatasetImage(Dataset dataset, int size) { - if (dataset == null) { - return null; - } - - DataFile assignedThumbnailFile = dataset.getThumbnailFile(); - - if (assignedThumbnailFile != null) { - Long assignedThumbnailFileId = assignedThumbnailFile.getId(); - - if (this.dvobjectThumbnailsMap.containsKey(assignedThumbnailFileId)) { - // Yes, return previous answer - //logger.info("using cached result for ... "+assignedThumbnailFileId); - if (!"".equals(this.dvobjectThumbnailsMap.get(assignedThumbnailFileId))) { - return this.dvobjectThumbnailsMap.get(assignedThumbnailFileId); - } - return null; - } - - String imageSourceBase64 = ImageThumbConverter.getImageThumbnailAsBase64(assignedThumbnailFile, - size); - //ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); - - if (imageSourceBase64 != null) { - this.dvobjectThumbnailsMap.put(assignedThumbnailFileId, imageSourceBase64); - return imageSourceBase64; - } - - // OK - we can't use this "assigned" image, because of permissions, or because - // the thumbnail failed to generate, etc... in this case we'll - // mark this dataset in the lookup map - so that we don't have to - // do all these lookups again... - this.dvobjectThumbnailsMap.put(assignedThumbnailFileId, ""); - - // TODO: (?) - // do we need to cache this datafile object in the view map? - // -- L.A., 4.2.2 - } - - return null; - - } + private Map hasThumbMap = new HashMap<>(); // it's the responsibility of the user - to make sure the search result // passed to this method is of the Datafile type! @@ -130,7 +88,7 @@ public String getFileCardImageAsBase64Url(SolrSearchResult result) { if ((!((DataFile)result.getEntity()).isRestricted() || permissionsWrapper.hasDownloadFilePermission(result.getEntity())) - && dataFileService.isThumbnailAvailable((DataFile) result.getEntity())) { + && isThumbnailAvailable((DataFile) result.getEntity())) { cardImageUrl = ImageThumbConverter.getImageThumbnailAsBase64( (DataFile) result.getEntity(), @@ -156,9 +114,16 @@ public String getFileCardImageAsBase64Url(SolrSearchResult result) { return null; } + public boolean isThumbnailAvailable(DataFile entity) { + if(!hasThumbMap.containsKey(entity.getId())) { + hasThumbMap.put(entity.getId(), dataFileService.isThumbnailAvailable(entity)); + } + return hasThumbMap.get(entity.getId()); + } + // it's the responsibility of the user - to make sure the search result // passed to this method is of the Dataset type! - public String getDatasetCardImageAsBase64Url(SolrSearchResult result) { + public String getDatasetCardImageAsUrl(SolrSearchResult result) { // Before we do anything else, check if it's a harvested dataset; // no need to check anything else if so (harvested datasets never have // thumbnails) @@ -176,13 +141,14 @@ public String getDatasetCardImageAsBase64Url(SolrSearchResult result) { return null; } Dataset dataset = (Dataset)result.getEntity(); + dataset.setId(result.getEntityId()); Long versionId = result.getDatasetVersionId(); - return getDatasetCardImageAsBase64Url(dataset, versionId, result.isPublishedState(), ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); + return getDatasetCardImageAsUrl(dataset, versionId, result.isPublishedState(), ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); } - public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, boolean autoselect, int size) { + public String getDatasetCardImageAsUrl(Dataset dataset, Long versionId, boolean autoselect, int size) { Long datasetId = dataset.getId(); if (datasetId != null) { if (this.dvobjectThumbnailsMap.containsKey(datasetId)) { @@ -203,112 +169,34 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo this.dvobjectThumbnailsMap.put(datasetId, ""); return null; } - - String cardImageUrl = null; - StorageIO dataAccess = null; - - try{ - dataAccess = DataAccess.getStorageIO(dataset); - } - catch(IOException ioex){ - // ignore - } - - InputStream in = null; - // See if the dataset already has a dedicated thumbnail ("logo") saved as - // an auxilary file on the dataset level: - // (don't bother checking if it exists; just try to open the input stream) - try { - in = dataAccess.getAuxFileAsInputStream(datasetLogoThumbnail + ".thumb" + size); - //thumb48addedByImageThumbConverter); - } catch (Exception ioex) { - //ignore - } - - if (in != null) { - try { - byte[] bytes = IOUtils.toByteArray(in); - String base64image = Base64.getEncoder().encodeToString(bytes); - cardImageUrl = FileUtil.DATA_URI_SCHEME + base64image; - this.dvobjectThumbnailsMap.put(datasetId, cardImageUrl); - return cardImageUrl; - } catch (IOException ex) { - this.dvobjectThumbnailsMap.put(datasetId, ""); - return null; - // (alternatively, we could ignore the exception, and proceed with the - // regular process of selecting the thumbnail from the available - // image files - ?) - } finally - { - IOUtils.closeQuietly(in); - } - } - - // If not, see if the dataset has one of its image files already assigned - // to be the designated thumbnail: - cardImageUrl = this.getAssignedDatasetImage(dataset, size); - - if (cardImageUrl != null) { - //logger.info("dataset id " + result.getEntity().getId() + " has a dedicated image assigned; returning " + cardImageUrl); - return cardImageUrl; - } - - // And finally, try to auto-select the thumbnail (unless instructed not to): - - if (!autoselect) { - return null; - } - - // We attempt to auto-select via the optimized, native query-based method - // from the DatasetVersionService: - Long thumbnailImageFileId = datasetVersionService.getThumbnailByVersionId(versionId); - - if (thumbnailImageFileId != null) { - //cardImageUrl = FILE_CARD_IMAGE_URL + thumbnailImageFileId; - if (this.dvobjectThumbnailsMap.containsKey(thumbnailImageFileId)) { - // Yes, return previous answer - //logger.info("using cached result for ... "+datasetId); - if (!"".equals(this.dvobjectThumbnailsMap.get(thumbnailImageFileId))) { - return this.dvobjectThumbnailsMap.get(thumbnailImageFileId); - } - return null; - } + DataFile thumbnailFile = dataset.getThumbnailFile(); - DataFile thumbnailImageFile = null; + if (thumbnailFile == null) { - if (dvobjectViewMap.containsKey(thumbnailImageFileId) - && dvobjectViewMap.get(thumbnailImageFileId).isInstanceofDataFile()) { - thumbnailImageFile = (DataFile) dvobjectViewMap.get(thumbnailImageFileId); - } else { - thumbnailImageFile = dataFileService.findCheapAndEasy(thumbnailImageFileId); - if (thumbnailImageFile != null) { - // TODO: - // do we need this file on the map? - it may not even produce - // a thumbnail! - dvobjectViewMap.put(thumbnailImageFileId, thumbnailImageFile); - } else { - this.dvobjectThumbnailsMap.put(thumbnailImageFileId, ""); - return null; + boolean hasDatasetLogo = false; + StorageIO storageIO = null; + try { + storageIO = DataAccess.getStorageIO(dataset); + if (storageIO.isAuxObjectCached(DatasetUtil.datasetLogoFilenameFinal)) { + // If not, return null/use the default, otherwise pass the logo URL + hasDatasetLogo = true; } + } catch (IOException ioex) { + logger.warning("getDatasetCardImageAsUrl(): Failed to initialize dataset StorageIO for " + + dataset.getStorageIdentifier() + " (" + ioex.getMessage() + ")"); } - - if (dataFileService.isThumbnailAvailable(thumbnailImageFile)) { - cardImageUrl = ImageThumbConverter.getImageThumbnailAsBase64( - thumbnailImageFile, - size); - //ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); - } - - if (cardImageUrl != null) { - this.dvobjectThumbnailsMap.put(thumbnailImageFileId, cardImageUrl); - } else { - this.dvobjectThumbnailsMap.put(thumbnailImageFileId, ""); + // If no other logo we attempt to auto-select via the optimized, native + // query-based method + // from the DatasetVersionService: + if (!hasDatasetLogo && datasetVersionService.getThumbnailByVersionId(versionId) == null) { + return null; } } - - //logger.info("dataset id " + result.getEntityId() + ", returning " + cardImageUrl); - - return cardImageUrl; + String url = SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + dataset.getId() + "/logo"; + logger.fine("getDatasetCardImageAsUrl: " + url); + this.dvobjectThumbnailsMap.put(datasetId,url); + return url; + } // it's the responsibility of the user - to make sure the search result @@ -320,6 +208,7 @@ public String getDataverseCardImageAsBase64Url(SolrSearchResult result) { public void resetObjectMaps() { dvobjectThumbnailsMap = new HashMap<>(); dvobjectViewMap = new HashMap<>(); + hasThumbMap = new HashMap<>(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/UserNotification.java b/src/main/java/edu/harvard/iq/dataverse/UserNotification.java index a87404b69a7..280c2075494 100644 --- a/src/main/java/edu/harvard/iq/dataverse/UserNotification.java +++ b/src/main/java/edu/harvard/iq/dataverse/UserNotification.java @@ -39,7 +39,7 @@ public enum Type { CHECKSUMIMPORT, CHECKSUMFAIL, CONFIRMEMAIL, APIGENERATED, INGESTCOMPLETED, INGESTCOMPLETEDWITHERRORS, PUBLISHFAILED_PIDREG, WORKFLOW_SUCCESS, WORKFLOW_FAILURE, STATUSUPDATED, DATASETCREATED, DATASETMENTIONED, GLOBUSUPLOADCOMPLETED, GLOBUSUPLOADCOMPLETEDWITHERRORS, - GLOBUSDOWNLOADCOMPLETED, GLOBUSDOWNLOADCOMPLETEDWITHERRORS; + GLOBUSDOWNLOADCOMPLETED, GLOBUSDOWNLOADCOMPLETEDWITHERRORS, REQUESTEDFILEACCESS; public String getDescription() { return BundleUtil.getStringFromBundle("notification.typeDescription." + this.name()); diff --git a/src/main/java/edu/harvard/iq/dataverse/UserNotificationServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/UserNotificationServiceBean.java index a2a71ff8b40..228e4b19c38 100644 --- a/src/main/java/edu/harvard/iq/dataverse/UserNotificationServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/UserNotificationServiceBean.java @@ -131,6 +131,7 @@ public void sendNotification(AuthenticatedUser dataverseUser, Timestamp sendDate save(userNotification); } } + public boolean isEmailMuted(UserNotification userNotification) { final Type type = userNotification.getType(); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 5a4c9ab9058..60e0b79662b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -1,30 +1,8 @@ package edu.harvard.iq.dataverse.api; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.DataFileServiceBean; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetFieldServiceBean; -import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.DatasetLinkingDataverse; -import edu.harvard.iq.dataverse.DatasetLinkingServiceBean; -import edu.harvard.iq.dataverse.DatasetServiceBean; -import edu.harvard.iq.dataverse.DatasetVersionServiceBean; -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DataverseLinkingDataverse; -import edu.harvard.iq.dataverse.DataverseLinkingServiceBean; -import edu.harvard.iq.dataverse.DataverseRoleServiceBean; -import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.DvObjectServiceBean; -import edu.harvard.iq.dataverse.EjbDataverseEngine; -import edu.harvard.iq.dataverse.GuestbookResponseServiceBean; -import edu.harvard.iq.dataverse.MetadataBlock; -import edu.harvard.iq.dataverse.MetadataBlockServiceBean; -import edu.harvard.iq.dataverse.PermissionServiceBean; -import edu.harvard.iq.dataverse.RoleAssigneeServiceBean; -import edu.harvard.iq.dataverse.UserNotificationServiceBean; -import edu.harvard.iq.dataverse.UserServiceBean; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean; +import static edu.harvard.iq.dataverse.api.Datasets.handleVersion; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.RoleAssignee; @@ -38,44 +16,47 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; +import edu.harvard.iq.dataverse.engine.command.impl.GetDraftDatasetVersionCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetLatestAccessibleDatasetVersionCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetLatestPublishedDatasetVersionCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetSpecificPublishedDatasetVersionCommand; import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean; import edu.harvard.iq.dataverse.license.LicenseServiceBean; -import edu.harvard.iq.dataverse.metrics.MetricsServiceBean; import edu.harvard.iq.dataverse.locality.StorageSiteServiceBean; +import edu.harvard.iq.dataverse.metrics.MetricsServiceBean; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JsonParser; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean; -import java.io.StringReader; -import java.net.URI; -import java.util.Arrays; -import java.util.Collections; -import java.util.UUID; -import java.util.concurrent.Callable; -import java.util.logging.Level; -import java.util.logging.Logger; import jakarta.ejb.EJB; import jakarta.ejb.EJBException; -import jakarta.json.Json; -import jakarta.json.JsonArray; -import jakarta.json.JsonArrayBuilder; -import jakarta.json.JsonObject; -import jakarta.json.JsonObjectBuilder; -import jakarta.json.JsonReader; -import jakarta.json.JsonValue; +import jakarta.json.*; import jakarta.json.JsonValue.ValueType; import jakarta.persistence.EntityManager; import jakarta.persistence.NoResultException; import jakarta.persistence.PersistenceContext; import jakarta.servlet.http.HttpServletRequest; import jakarta.ws.rs.container.ContainerRequestContext; -import jakarta.ws.rs.core.*; +import jakarta.ws.rs.core.Context; +import jakarta.ws.rs.core.MediaType; +import jakarta.ws.rs.core.Response; import jakarta.ws.rs.core.Response.ResponseBuilder; import jakarta.ws.rs.core.Response.Status; +import java.io.InputStream; +import java.net.URI; +import java.util.Arrays; +import java.util.Collections; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.logging.Level; +import java.util.logging.Logger; + import static org.apache.commons.lang3.StringUtils.isNumeric; /** @@ -132,23 +113,21 @@ public Response refineResponse( String message ) { * In the common case of the wrapped response being of type JSON, * return the message field it has (if any). * @return the content of a message field, or {@code null}. + * @throws JsonException when JSON parsing fails. */ String getWrappedMessageWhenJson() { if ( response.getMediaType().equals(MediaType.APPLICATION_JSON_TYPE) ) { Object entity = response.getEntity(); if ( entity == null ) return null; - String json = entity.toString(); - try ( StringReader rdr = new StringReader(json) ){ - JsonReader jrdr = Json.createReader(rdr); - JsonObject obj = jrdr.readObject(); - if ( obj.containsKey("message") ) { - JsonValue message = obj.get("message"); - return message.getValueType() == ValueType.STRING ? obj.getString("message") : message.toString(); - } else { - return null; - } + JsonObject obj = JsonUtil.getJsonObject(entity.toString()); + if ( obj.containsKey("message") ) { + JsonValue message = obj.get("message"); + return message.getValueType() == ValueType.STRING ? obj.getString("message") : message.toString(); + } else { + return null; } + } else { return null; } @@ -416,9 +395,34 @@ protected Dataset findDatasetOrDie(String id) throws WrappedResponse { } } } + + protected DatasetVersion findDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, boolean includeDeaccessioned, boolean checkPermsWhenDeaccessioned) throws WrappedResponse { + DatasetVersion dsv = execCommand(handleVersion(versionNumber, new Datasets.DsVersionHandler>() { + + @Override + public Command handleLatest() { + return new GetLatestAccessibleDatasetVersionCommand(req, ds, includeDeaccessioned, checkPermsWhenDeaccessioned); + } + + @Override + public Command handleDraft() { + return new GetDraftDatasetVersionCommand(req, ds); + } + + @Override + public Command handleSpecific(long major, long minor) { + return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor, includeDeaccessioned, checkPermsWhenDeaccessioned); + } + + @Override + public Command handleLatestPublished() { + return new GetLatestPublishedDatasetVersionCommand(req, ds, includeDeaccessioned, checkPermsWhenDeaccessioned); + } + })); + return dsv; + } protected DataFile findDataFileOrDie(String id) throws WrappedResponse { - DataFile datafile; if (id.equals(PERSISTENT_ID_KEY)) { String persistentId = getRequestParameter(PERSISTENT_ID_KEY.substring(1)); @@ -661,7 +665,15 @@ protected Response ok( JsonArrayBuilder bld ) { .add("data", bld).build()) .type(MediaType.APPLICATION_JSON).build(); } - + + protected Response ok( JsonArrayBuilder bld , long totalCount) { + return Response.ok(Json.createObjectBuilder() + .add("status", ApiConstants.STATUS_OK) + .add("totalCount", totalCount) + .add("data", bld).build()) + .type(MediaType.APPLICATION_JSON).build(); + } + protected Response ok( JsonArray ja ) { return Response.ok(Json.createObjectBuilder() .add("status", ApiConstants.STATUS_OK) @@ -708,6 +720,12 @@ protected Response ok( boolean value ) { .add("data", value).build() ).build(); } + protected Response ok(long value) { + return Response.ok().entity(Json.createObjectBuilder() + .add("status", ApiConstants.STATUS_OK) + .add("data", value).build()).build(); + } + /** * @param data Payload to return. * @param mediaType Non-JSON media type. @@ -722,6 +740,11 @@ protected Response ok(String data, MediaType mediaType, String downloadFilename) return res.build(); } + protected Response ok(InputStream inputStream) { + ResponseBuilder res = Response.ok().entity(inputStream).type(MediaType.valueOf(FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT)); + return res.build(); + } + protected Response created( String uri, JsonObjectBuilder bld ) { return Response.created( URI.create(uri) ) .entity( Json.createObjectBuilder() diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 0341f8c1127..297ec2d3681 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -31,6 +31,7 @@ import edu.harvard.iq.dataverse.RoleAssignment; import edu.harvard.iq.dataverse.UserNotification; import edu.harvard.iq.dataverse.UserNotificationServiceBean; +import edu.harvard.iq.dataverse.ThemeWidgetFragment; import static edu.harvard.iq.dataverse.api.Datasets.handleVersion; @@ -46,6 +47,7 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccessRequest; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.dataaccess.DataFileZipper; +import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.dataaccess.OptionalAccessService; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.datavariable.DataVariable; @@ -327,8 +329,8 @@ public Response datafile(@Context ContainerRequestContext crc, @PathParam("fileI dInfo.addServiceAvailable(new OptionalAccessService("preprocessed", "application/json", "format=prep", "Preprocessed data in JSON")); dInfo.addServiceAvailable(new OptionalAccessService("subset", "text/tab-separated-values", "variables=<LIST>", "Column-wise Subsetting")); } - - if(systemConfig.isGlobusFileDownload() && systemConfig.getGlobusStoresList().contains(DataAccess.getStorageDriverFromIdentifier(df.getStorageIdentifier()))) { + String driverId = DataAccess.getStorageDriverFromIdentifier(df.getStorageIdentifier()); + if(systemConfig.isGlobusFileDownload() && (GlobusAccessibleStore.acceptsGlobusTransfers(driverId) || GlobusAccessibleStore.allowsGlobusReferences(driverId))) { dInfo.addServiceAvailable(new OptionalAccessService("GlobusTransfer", df.getContentType(), "format=GlobusTransfer", "Download via Globus")); } @@ -1196,16 +1198,7 @@ private File getLogo(Dataverse dataverse) { DataverseTheme theme = dataverse.getDataverseTheme(); if (theme != null && theme.getLogo() != null && !theme.getLogo().equals("")) { - Properties p = System.getProperties(); - String domainRoot = p.getProperty("com.sun.aas.instanceRoot"); - - if (domainRoot != null && !"".equals(domainRoot)) { - return new File (domainRoot + File.separator + - "docroot" + File.separator + - "logos" + File.separator + - dataverse.getLogoOwnerId() + File.separator + - theme.getLogo()); - } + return ThemeWidgetFragment.getLogoDir(dataverse.getLogoOwnerId()).resolve(theme.getLogo()).toFile(); } return null; @@ -1427,7 +1420,7 @@ public Response requestFileAccess(@Context ContainerRequestContext crc, @PathPar return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.requestAccess.failure.invalidRequest")); } - if (dataFile.containsFileAccessRequestFromUser(requestor)) { + if (dataFile.containsActiveFileAccessRequestFromUser(requestor)) { return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.requestAccess.failure.requestExists")); } @@ -1478,17 +1471,17 @@ public Response listFileAccessRequests(@Context ContainerRequestContext crc, @Pa return error(FORBIDDEN, BundleUtil.getStringFromBundle("access.api.rejectAccess.failure.noPermissions")); } - List requests = dataFile.getFileAccessRequests(); + List requests = dataFile.getFileAccessRequests(FileAccessRequest.RequestState.CREATED); if (requests == null || requests.isEmpty()) { List args = Arrays.asList(dataFile.getDisplayName()); - return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.requestList.noRequestsFound", args)); + return error(Response.Status.NOT_FOUND, BundleUtil.getStringFromBundle("access.api.requestList.noRequestsFound", args)); } JsonArrayBuilder userArray = Json.createArrayBuilder(); for (FileAccessRequest fileAccessRequest : requests) { - userArray.add(json(fileAccessRequest.getAuthenticatedUser())); + userArray.add(json(fileAccessRequest.getRequester())); } return ok(userArray); @@ -1534,7 +1527,9 @@ public Response grantFileAccess(@Context ContainerRequestContext crc, @PathParam try { engineSvc.submit(new AssignRoleCommand(ra, fileDownloaderRole, dataFile, dataverseRequest, null)); - if (dataFile.removeFileAccessRequester(ra)) { + FileAccessRequest far = dataFile.getAccessRequestForAssignee(ra); + if(far!=null) { + far.setStateGranted(); dataFileService.save(dataFile); } @@ -1659,26 +1654,68 @@ public Response rejectFileAccess(@Context ContainerRequestContext crc, @PathPara if (!(dataverseRequest.getAuthenticatedUser().isSuperuser() || permissionService.requestOn(dataverseRequest, dataFile).has(Permission.ManageFilePermissions))) { return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.rejectAccess.failure.noPermissions")); } - - if (dataFile.removeFileAccessRequester(ra)) { + FileAccessRequest far = dataFile.getAccessRequestForAssignee(ra); + if (far != null) { + far.setStateRejected(); dataFileService.save(dataFile); try { AuthenticatedUser au = (AuthenticatedUser) ra; - userNotificationService.sendNotification(au, new Timestamp(new Date().getTime()), UserNotification.Type.REJECTFILEACCESS, dataFile.getOwner().getId()); + userNotificationService.sendNotification(au, new Timestamp(new Date().getTime()), + UserNotification.Type.REJECTFILEACCESS, dataFile.getOwner().getId()); } catch (ClassCastException e) { - //nothing to do here - can only send a notification to an authenticated user + // nothing to do here - can only send a notification to an authenticated user } List args = Arrays.asList(dataFile.getDisplayName()); return ok(BundleUtil.getStringFromBundle("access.api.rejectAccess.success.for.single.file", args)); - } else { List args = Arrays.asList(dataFile.getDisplayName(), ra.getDisplayInfo().getTitle()); return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.fileAccess.rejectFailure.noRequest", args)); } } - + + @GET + @AuthRequired + @Path("/datafile/{id}/userFileAccessRequested") + public Response getUserFileAccessRequested(@Context ContainerRequestContext crc, @PathParam("id") String dataFileId) { + DataFile dataFile; + AuthenticatedUser requestAuthenticatedUser; + try { + dataFile = findDataFileOrDie(dataFileId); + requestAuthenticatedUser = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + boolean fileAccessRequested = false; + List requests = dataFile.getFileAccessRequests(); + for (FileAccessRequest fileAccessRequest : requests) { + if (fileAccessRequest.getRequester().getId().equals(requestAuthenticatedUser.getId())) { + fileAccessRequested = true; + break; + } + } + return ok(fileAccessRequested); + } + + @GET + @AuthRequired + @Path("/datafile/{id}/userPermissions") + public Response getUserPermissionsOnFile(@Context ContainerRequestContext crc, @PathParam("id") String dataFileId) { + DataFile dataFile; + try { + dataFile = findDataFileOrDie(dataFileId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder(); + User requestUser = getRequestUser(crc); + jsonObjectBuilder.add("canDownloadFile", permissionService.userOn(requestUser, dataFile).has(Permission.DownloadFile)); + jsonObjectBuilder.add("canManageFilePermissions", permissionService.userOn(requestUser, dataFile).has(Permission.ManageFilePermissions)); + jsonObjectBuilder.add("canEditOwnerDataset", permissionService.userOn(requestUser, dataFile.getOwner()).has(Permission.EditDataset)); + return ok(jsonObjectBuilder); + } + // checkAuthorization is a convenience method; it calls the boolean method // isAccessAuthorized(), the actual workhorse, tand throws a 403 exception if not. @@ -1945,5 +1982,5 @@ private URI handleCustomZipDownload(User user, String customZipServiceUrl, Strin throw new BadRequestException(); } return redirectUri; - } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index fd3b9a89e54..d098c2fe16a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -14,11 +14,11 @@ import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.DataverseSession; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.DvObjectServiceBean; import edu.harvard.iq.dataverse.api.auth.AuthRequired; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.validation.EMailValidator; import edu.harvard.iq.dataverse.EjbDataverseEngine; -import edu.harvard.iq.dataverse.HandlenetServiceBean; import edu.harvard.iq.dataverse.Template; import edu.harvard.iq.dataverse.TemplateServiceBean; import edu.harvard.iq.dataverse.UserServiceBean; @@ -96,8 +96,8 @@ import edu.harvard.iq.dataverse.engine.command.impl.DeleteRoleCommand; import edu.harvard.iq.dataverse.engine.command.impl.DeleteTemplateCommand; import edu.harvard.iq.dataverse.engine.command.impl.RegisterDvObjectCommand; -import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.userdata.UserListMaker; import edu.harvard.iq.dataverse.userdata.UserListResult; @@ -105,8 +105,10 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.UrlSignerUtil; +import java.io.FileInputStream; import java.io.IOException; import java.io.OutputStream; @@ -122,6 +124,7 @@ import jakarta.ws.rs.QueryParam; import jakarta.ws.rs.WebApplicationException; import jakarta.ws.rs.core.StreamingOutput; +import java.nio.file.Paths; /** * Where the secure, setup API calls live. @@ -134,46 +137,48 @@ public class Admin extends AbstractApiBean { private static final Logger logger = Logger.getLogger(Admin.class.getName()); - @EJB - AuthenticationProvidersRegistrationServiceBean authProvidersRegistrationSvc; - @EJB - BuiltinUserServiceBean builtinUserService; - @EJB - ShibServiceBean shibService; - @EJB - AuthTestDataServiceBean authTestDataService; - @EJB - UserServiceBean userService; - @EJB - IngestServiceBean ingestService; - @EJB - DataFileServiceBean fileService; - @EJB - DatasetServiceBean datasetService; - @EJB - DataverseServiceBean dataverseService; - @EJB - DatasetVersionServiceBean datasetversionService; - @Inject - DataverseRequestServiceBean dvRequestService; - @EJB - EjbDataverseEngine commandEngine; - @EJB - GroupServiceBean groupService; - @EJB - SettingsServiceBean settingsService; - @EJB - DatasetVersionServiceBean datasetVersionService; - @EJB - ExplicitGroupServiceBean explicitGroupService; - @EJB - BannerMessageServiceBean bannerMessageService; - @EJB - TemplateServiceBean templateService; - - // Make the session available - @Inject - DataverseSession session; + @EJB + AuthenticationProvidersRegistrationServiceBean authProvidersRegistrationSvc; + @EJB + BuiltinUserServiceBean builtinUserService; + @EJB + ShibServiceBean shibService; + @EJB + AuthTestDataServiceBean authTestDataService; + @EJB + UserServiceBean userService; + @EJB + IngestServiceBean ingestService; + @EJB + DataFileServiceBean fileService; + @EJB + DatasetServiceBean datasetService; + @EJB + DataverseServiceBean dataverseService; + @EJB + DvObjectServiceBean dvObjectService; + @EJB + DatasetVersionServiceBean datasetversionService; + @Inject + DataverseRequestServiceBean dvRequestService; + @EJB + EjbDataverseEngine commandEngine; + @EJB + GroupServiceBean groupService; + @EJB + SettingsServiceBean settingsService; + @EJB + DatasetVersionServiceBean datasetVersionService; + @EJB + ExplicitGroupServiceBean explicitGroupService; + @EJB + BannerMessageServiceBean bannerMessageService; + @EJB + TemplateServiceBean templateService; + + // Make the session available + @Inject + DataverseSession session; public static final String listUsersPartialAPIPath = "list-users"; public static final String listUsersFullAPIPath = "/api/admin/" + listUsersPartialAPIPath; @@ -1472,10 +1477,7 @@ public Response isOrcidEnabled() { public Response reregisterHdlToPID(@Context ContainerRequestContext crc, @PathParam("id") String id) { logger.info("Starting to reregister " + id + " Dataset Id. (from hdl to doi)" + new Date()); try { - if (settingsSvc.get(SettingsServiceBean.Key.Protocol.toString()).equals(HandlenetServiceBean.HDL_PROTOCOL)) { - logger.info("Bad Request protocol set to handle " ); - return error(Status.BAD_REQUEST, BundleUtil.getStringFromBundle("admin.api.migrateHDL.failure.must.be.set.for.doi")); - } + User u = getRequestUser(crc); if (!u.isSuperuser()) { @@ -1485,7 +1487,12 @@ public Response reregisterHdlToPID(@Context ContainerRequestContext crc, @PathPa DataverseRequest r = createDataverseRequest(u); Dataset ds = findDatasetOrDie(id); - if (ds.getIdentifier() != null && !ds.getIdentifier().isEmpty() && ds.getProtocol().equals(HandlenetServiceBean.HDL_PROTOCOL)) { + + if (HandlePidProvider.HDL_PROTOCOL.equals(dvObjectService.getEffectivePidGenerator(ds).getProtocol())) { + logger.info("Bad Request protocol set to handle " ); + return error(Status.BAD_REQUEST, BundleUtil.getStringFromBundle("admin.api.migrateHDL.failure.must.be.set.for.doi")); + } + if (ds.getIdentifier() != null && !ds.getIdentifier().isEmpty() && ds.getProtocol().equals(HandlePidProvider.HDL_PROTOCOL)) { execCommand(new RegisterDvObjectCommand(r, ds, true)); } else { return error(Status.BAD_REQUEST, BundleUtil.getStringFromBundle("admin.api.migrateHDL.failure.must.be.hdl.dataset")); @@ -2418,12 +2425,60 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur } String baseUrl = urlInfo.getString("url"); - int timeout = urlInfo.getInt(ExternalToolHandler.TIMEOUT, 10); - String method = urlInfo.getString(ExternalToolHandler.HTTP_METHOD, "GET"); + int timeout = urlInfo.getInt(URLTokenUtil.TIMEOUT, 10); + String method = urlInfo.getString(URLTokenUtil.HTTP_METHOD, "GET"); String signedUrl = UrlSignerUtil.signUrl(baseUrl, timeout, userId, method, key); - return ok(Json.createObjectBuilder().add(ExternalToolHandler.SIGNED_URL, signedUrl)); + return ok(Json.createObjectBuilder().add(URLTokenUtil.SIGNED_URL, signedUrl)); } + @DELETE + @Path("/clearThumbnailFailureFlag") + public Response clearThumbnailFailureFlag() { + em.createNativeQuery("UPDATE dvobject SET previewimagefail = FALSE").executeUpdate(); + return ok("Thumbnail Failure Flags cleared."); + } + + @DELETE + @Path("/clearThumbnailFailureFlag/{id}") + public Response clearThumbnailFailureFlagByDatafile(@PathParam("id") String fileId) { + try { + DataFile df = findDataFileOrDie(fileId); + Query deleteQuery = em.createNativeQuery("UPDATE dvobject SET previewimagefail = FALSE where id = ?"); + deleteQuery.setParameter(1, df.getId()); + deleteQuery.executeUpdate(); + return ok("Thumbnail Failure Flag cleared for file id=: " + df.getId() + "."); + } catch (WrappedResponse r) { + logger.info("Could not find file with the id: " + fileId); + return error(Status.BAD_REQUEST, "Could not find file with the id: " + fileId); + } + } + + /** + * For testing only. Download a file from /tmp. + */ + @GET + @AuthRequired + @Path("/downloadTmpFile") + public Response downloadTmpFile(@Context ContainerRequestContext crc, @QueryParam("fullyQualifiedPathToFile") String fullyQualifiedPathToFile) { + try { + AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc); + if (!user.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + java.nio.file.Path normalizedPath = Paths.get(fullyQualifiedPathToFile).normalize(); + if (!normalizedPath.toString().startsWith("/tmp")) { + return error(Status.BAD_REQUEST, "Path must begin with '/tmp' but after normalization was '" + normalizedPath +"'."); + } + try { + return ok(new FileInputStream(fullyQualifiedPathToFile)); + } catch (IOException ex) { + return error(Status.BAD_REQUEST, ex.toString()); + } + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java b/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java index 296869762da..347a8946a46 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java @@ -12,4 +12,9 @@ private ApiConstants() { // Authentication public static final String CONTAINER_REQUEST_CONTEXT_USER = "user"; + + // Dataset + public static final String DS_VERSION_LATEST = ":latest"; + public static final String DS_VERSION_DRAFT = ":draft"; + public static final String DS_VERSION_LATEST_PUBLISHED = ":latest-published"; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index dbea63cb1c8..ad66fb468f4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1,9 +1,11 @@ package edu.harvard.iq.dataverse.api; +import com.amazonaws.services.s3.model.PartETag; import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.actionlogging.ActionLogRecord; import edu.harvard.iq.dataverse.api.auth.AuthRequired; +import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.Permission; @@ -13,6 +15,7 @@ import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode; +import edu.harvard.iq.dataverse.dataaccess.*; import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil; import edu.harvard.iq.dataverse.datacapturemodule.ScriptRequestResponse; import edu.harvard.iq.dataverse.dataset.DatasetThumbnail; @@ -23,141 +26,72 @@ import edu.harvard.iq.dataverse.datasetutility.OptionalFileParams; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.engine.command.impl.AbstractSubmitToArchiveCommand; -import edu.harvard.iq.dataverse.engine.command.impl.AddLockCommand; -import edu.harvard.iq.dataverse.engine.command.impl.AssignRoleCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreateDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreatePrivateUrlCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CuratePublishedDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetLinkingDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeletePrivateUrlCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DestroyDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.FinalizeDatasetPublicationCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetSpecificPublishedDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetDraftDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetLatestAccessibleDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetLatestPublishedDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetPrivateUrlCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ImportFromFileSystemCommand; -import edu.harvard.iq.dataverse.engine.command.impl.LinkDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ListRoleAssignments; -import edu.harvard.iq.dataverse.engine.command.impl.ListVersionsCommand; -import edu.harvard.iq.dataverse.engine.command.impl.MoveDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetResult; -import edu.harvard.iq.dataverse.engine.command.impl.RemoveLockCommand; -import edu.harvard.iq.dataverse.engine.command.impl.RequestRsyncScriptCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ReturnDatasetToAuthorCommand; -import edu.harvard.iq.dataverse.engine.command.impl.SetDatasetCitationDateCommand; -import edu.harvard.iq.dataverse.engine.command.impl.SetCurationStatusCommand; -import edu.harvard.iq.dataverse.engine.command.impl.SubmitDatasetForReviewCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetTargetURLCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetThumbnailCommand; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.UnforcedCommandException; +import edu.harvard.iq.dataverse.engine.command.impl.*; import edu.harvard.iq.dataverse.export.DDIExportServiceBean; import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.externaltools.ExternalTool; import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; +import edu.harvard.iq.dataverse.globus.GlobusUtil; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; -import edu.harvard.iq.dataverse.privateurl.PrivateUrl; -import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; -import edu.harvard.iq.dataverse.dataaccess.DataAccess; -import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; -import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; -import edu.harvard.iq.dataverse.dataaccess.StorageIO; -import edu.harvard.iq.dataverse.engine.command.exception.CommandException; -import edu.harvard.iq.dataverse.engine.command.exception.UnforcedCommandException; -import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetStorageSizeCommand; -import edu.harvard.iq.dataverse.engine.command.impl.RevokeRoleCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDvObjectPIDMetadataCommand; -import edu.harvard.iq.dataverse.makedatacount.DatasetExternalCitations; -import edu.harvard.iq.dataverse.makedatacount.DatasetExternalCitationsServiceBean; -import edu.harvard.iq.dataverse.makedatacount.DatasetMetrics; -import edu.harvard.iq.dataverse.makedatacount.DatasetMetricsServiceBean; -import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean; +import edu.harvard.iq.dataverse.makedatacount.*; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry; import edu.harvard.iq.dataverse.metrics.MetricsUtil; -import edu.harvard.iq.dataverse.makedatacount.MakeDataCountUtil; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; +import edu.harvard.iq.dataverse.search.IndexServiceBean; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import edu.harvard.iq.dataverse.util.ArchiverUtil; -import edu.harvard.iq.dataverse.util.BundleUtil; -import edu.harvard.iq.dataverse.util.EjbUtil; -import edu.harvard.iq.dataverse.util.FileUtil; -import edu.harvard.iq.dataverse.util.MarkupChecker; -import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.*; import edu.harvard.iq.dataverse.util.bagit.OREMap; -import edu.harvard.iq.dataverse.util.json.JSONLDUtil; -import edu.harvard.iq.dataverse.util.json.JsonLDTerm; -import edu.harvard.iq.dataverse.util.json.JsonParseException; -import edu.harvard.iq.dataverse.util.SignpostingResources; -import edu.harvard.iq.dataverse.util.json.JsonUtil; -import edu.harvard.iq.dataverse.search.IndexServiceBean; - -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; -import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; -import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; +import edu.harvard.iq.dataverse.util.json.*; import edu.harvard.iq.dataverse.workflow.Workflow; import edu.harvard.iq.dataverse.workflow.WorkflowContext; -import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean; import edu.harvard.iq.dataverse.workflow.WorkflowContext.TriggerType; - -import edu.harvard.iq.dataverse.globus.GlobusServiceBean; +import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean; +import jakarta.ejb.EJB; +import jakarta.ejb.EJBException; +import jakarta.inject.Inject; +import jakarta.json.*; +import jakarta.json.stream.JsonParsingException; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import jakarta.ws.rs.*; +import jakarta.ws.rs.container.ContainerRequestContext; +import jakarta.ws.rs.core.*; +import jakarta.ws.rs.core.Response.Status; +import org.apache.commons.lang3.StringUtils; +import org.glassfish.jersey.media.multipart.FormDataBodyPart; +import org.glassfish.jersey.media.multipart.FormDataContentDisposition; +import org.glassfish.jersey.media.multipart.FormDataParam; import java.io.IOException; import java.io.InputStream; -import java.io.StringReader; import java.net.URI; import java.sql.Timestamp; import java.text.MessageFormat; import java.text.SimpleDateFormat; import java.time.LocalDate; import java.time.LocalDateTime; -import java.util.*; -import java.util.concurrent.*; -import java.util.function.Predicate; import java.time.ZoneId; import java.time.format.DateTimeFormatter; +import java.util.*; import java.util.Map.Entry; +import java.util.concurrent.ExecutionException; +import java.util.function.Predicate; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Pattern; import java.util.stream.Collectors; -import jakarta.ejb.EJB; -import jakarta.ejb.EJBException; -import jakarta.inject.Inject; -import jakarta.json.*; -import jakarta.json.stream.JsonParsingException; -import jakarta.servlet.http.HttpServletRequest; -import jakarta.servlet.http.HttpServletResponse; -import jakarta.ws.rs.BadRequestException; -import jakarta.ws.rs.Consumes; -import jakarta.ws.rs.DELETE; -import jakarta.ws.rs.DefaultValue; -import jakarta.ws.rs.GET; -import jakarta.ws.rs.NotAcceptableException; -import jakarta.ws.rs.POST; -import jakarta.ws.rs.PUT; -import jakarta.ws.rs.Path; -import jakarta.ws.rs.PathParam; -import jakarta.ws.rs.Produces; -import jakarta.ws.rs.QueryParam; -import jakarta.ws.rs.container.ContainerRequestContext; -import jakarta.ws.rs.core.*; -import jakarta.ws.rs.core.Response.Status; +import static edu.harvard.iq.dataverse.api.ApiConstants.*; +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; +import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; -import org.apache.commons.lang3.StringUtils; -import org.glassfish.jersey.media.multipart.FormDataBodyPart; -import org.glassfish.jersey.media.multipart.FormDataContentDisposition; -import org.glassfish.jersey.media.multipart.FormDataParam; -import com.amazonaws.services.s3.model.PartETag; -import edu.harvard.iq.dataverse.settings.JvmSettings; - @Path("datasets") public class Datasets extends AbstractApiBean { @@ -236,6 +170,9 @@ public class Datasets extends AbstractApiBean { @Inject PrivateUrlServiceBean privateUrlService; + @Inject + DatasetVersionFilesServiceBean datasetVersionFilesServiceBean; + /** * Used to consolidate the way we parse and handle dataset versions. * @param @@ -250,29 +187,27 @@ public interface DsVersionHandler { @GET @AuthRequired @Path("{id}") - public Response getDataset(@Context ContainerRequestContext crc, @PathParam("id") String id, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) { + public Response getDataset(@Context ContainerRequestContext crc, @PathParam("id") String id, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response, @QueryParam("returnOwners") boolean returnOwners) { return response( req -> { final Dataset retrieved = execCommand(new GetDatasetCommand(req, findDatasetOrDie(id))); final DatasetVersion latest = execCommand(new GetLatestAccessibleDatasetVersionCommand(req, retrieved)); - final JsonObjectBuilder jsonbuilder = json(retrieved); + final JsonObjectBuilder jsonbuilder = json(retrieved, returnOwners); //Report MDC if this is a released version (could be draft if user has access, or user may not have access at all and is not getting metadata beyond the minimum) if((latest != null) && latest.isReleased()) { MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, retrieved); mdcLogService.logEntry(entry); } - return ok(jsonbuilder.add("latestVersion", (latest != null) ? json(latest) : null)); + return ok(jsonbuilder.add("latestVersion", (latest != null) ? json(latest, true) : null)); }, getRequestUser(crc)); } - // TODO: // This API call should, ideally, call findUserOrDie() and the GetDatasetCommand // to obtain the dataset that we are trying to export - which would handle // Auth in the process... For now, Auth isn't necessary - since export ONLY // WORKS on published datasets, which are open to the world. -- L.A. 4.5 - @GET @Path("/export") - @Produces({"application/xml", "application/json", "application/html" }) + @Produces({"application/xml", "application/json", "application/html", "application/ld+json" }) public Response exportDataset(@QueryParam("persistentId") String persistentId, @QueryParam("exporter") String exporter, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) { try { @@ -387,8 +322,8 @@ public Response destroyDataset(@Context ContainerRequestContext crc, @PathParam( @AuthRequired @Path("{id}/versions/{versionId}") public Response deleteDraftVersion(@Context ContainerRequestContext crc, @PathParam("id") String id, @PathParam("versionId") String versionId ){ - if ( ! ":draft".equals(versionId) ) { - return badRequest("Only the :draft version can be deleted"); + if (!DS_VERSION_DRAFT.equals(versionId)) { + return badRequest("Only the " + DS_VERSION_DRAFT + " version can be deleted"); } return response( req -> { @@ -466,33 +401,130 @@ public Response useDefaultCitationDate(@Context ContainerRequestContext crc, @Pa @GET @AuthRequired @Path("{id}/versions") - public Response listVersions(@Context ContainerRequestContext crc, @PathParam("id") String id ) { - return response( req -> - ok( execCommand( new ListVersionsCommand(req, findDatasetOrDie(id)) ) + public Response listVersions(@Context ContainerRequestContext crc, @PathParam("id") String id, @QueryParam("excludeFiles") Boolean excludeFiles, @QueryParam("limit") Integer limit, @QueryParam("offset") Integer offset) { + + return response( req -> { + Dataset dataset = findDatasetOrDie(id); + Boolean deepLookup = excludeFiles == null ? true : !excludeFiles; + + return ok( execCommand( new ListVersionsCommand(req, dataset, offset, limit, deepLookup) ) .stream() - .map( d -> json(d) ) - .collect(toJsonArray())), getRequestUser(crc)); + .map( d -> json(d, deepLookup) ) + .collect(toJsonArray())); + }, getRequestUser(crc)); } @GET @AuthRequired @Path("{id}/versions/{versionId}") - public Response getVersion(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + public Response getVersion(@Context ContainerRequestContext crc, + @PathParam("id") String datasetId, + @PathParam("versionId") String versionId, + @QueryParam("excludeFiles") Boolean excludeFiles, + @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, + @QueryParam("returnOwners") boolean returnOwners, + @Context UriInfo uriInfo, + @Context HttpHeaders headers) { return response( req -> { - DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); - return (dsv == null || dsv.getId() == null) ? notFound("Dataset version not found") - : ok(json(dsv)); + + + //If excludeFiles is null the default is to provide the files and because of this we need to check permissions. + boolean checkPerms = excludeFiles == null ? true : !excludeFiles; + + Dataset dst = findDatasetOrDie(datasetId); + DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, dst, uriInfo, headers, includeDeaccessioned, checkPerms); + + if (dsv == null || dsv.getId() == null) { + return notFound("Dataset version not found"); + } + + if (excludeFiles == null ? true : !excludeFiles) { + dsv = datasetversionService.findDeep(dsv.getId()); + } + return ok(json(dsv, null, excludeFiles == null ? true : !excludeFiles, returnOwners)); }, getRequestUser(crc)); } - + @GET @AuthRequired @Path("{id}/versions/{versionId}/files") - public Response getVersionFiles(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { - return response( req -> ok( jsonFileMetadatas( - getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers).getFileMetadatas())), getRequestUser(crc)); + public Response getVersionFiles(@Context ContainerRequestContext crc, + @PathParam("id") String datasetId, + @PathParam("versionId") String versionId, + @QueryParam("limit") Integer limit, + @QueryParam("offset") Integer offset, + @QueryParam("contentType") String contentType, + @QueryParam("accessStatus") String accessStatus, + @QueryParam("categoryName") String categoryName, + @QueryParam("tabularTagName") String tabularTagName, + @QueryParam("searchText") String searchText, + @QueryParam("orderCriteria") String orderCriteria, + @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, + @Context UriInfo uriInfo, + @Context HttpHeaders headers) { + return response(req -> { + DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned); + DatasetVersionFilesServiceBean.FileOrderCriteria fileOrderCriteria; + try { + fileOrderCriteria = orderCriteria != null ? DatasetVersionFilesServiceBean.FileOrderCriteria.valueOf(orderCriteria) : DatasetVersionFilesServiceBean.FileOrderCriteria.NameAZ; + } catch (IllegalArgumentException e) { + return badRequest(BundleUtil.getStringFromBundle("datasets.api.version.files.invalid.order.criteria", List.of(orderCriteria))); + } + FileSearchCriteria fileSearchCriteria; + try { + fileSearchCriteria = new FileSearchCriteria( + contentType, + accessStatus != null ? FileSearchCriteria.FileAccessStatus.valueOf(accessStatus) : null, + categoryName, + tabularTagName, + searchText + ); + } catch (IllegalArgumentException e) { + return badRequest(BundleUtil.getStringFromBundle("datasets.api.version.files.invalid.access.status", List.of(accessStatus))); + } + return ok(jsonFileMetadatas(datasetVersionFilesServiceBean.getFileMetadatas(datasetVersion, limit, offset, fileSearchCriteria, fileOrderCriteria)), + datasetVersionFilesServiceBean.getFileMetadataCount(datasetVersion, fileSearchCriteria)); + }, getRequestUser(crc)); } - + + @GET + @AuthRequired + @Path("{id}/versions/{versionId}/files/counts") + public Response getVersionFileCounts(@Context ContainerRequestContext crc, + @PathParam("id") String datasetId, + @PathParam("versionId") String versionId, + @QueryParam("contentType") String contentType, + @QueryParam("accessStatus") String accessStatus, + @QueryParam("categoryName") String categoryName, + @QueryParam("tabularTagName") String tabularTagName, + @QueryParam("searchText") String searchText, + @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, + @Context UriInfo uriInfo, + @Context HttpHeaders headers) { + return response(req -> { + FileSearchCriteria fileSearchCriteria; + try { + fileSearchCriteria = new FileSearchCriteria( + contentType, + accessStatus != null ? FileSearchCriteria.FileAccessStatus.valueOf(accessStatus) : null, + categoryName, + tabularTagName, + searchText + ); + } catch (IllegalArgumentException e) { + return badRequest(BundleUtil.getStringFromBundle("datasets.api.version.files.invalid.access.status", List.of(accessStatus))); + } + DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned); + JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder(); + jsonObjectBuilder.add("total", datasetVersionFilesServiceBean.getFileMetadataCount(datasetVersion, fileSearchCriteria)); + jsonObjectBuilder.add("perContentType", json(datasetVersionFilesServiceBean.getFileMetadataCountPerContentType(datasetVersion, fileSearchCriteria))); + jsonObjectBuilder.add("perCategoryName", json(datasetVersionFilesServiceBean.getFileMetadataCountPerCategoryName(datasetVersion, fileSearchCriteria))); + jsonObjectBuilder.add("perTabularTagName", jsonFileCountPerTabularTagNameMap(datasetVersionFilesServiceBean.getFileMetadataCountPerTabularTagName(datasetVersion, fileSearchCriteria))); + jsonObjectBuilder.add("perAccessStatus", jsonFileCountPerAccessStatusMap(datasetVersionFilesServiceBean.getFileMetadataCountPerAccessStatus(datasetVersion, fileSearchCriteria))); + return ok(jsonObjectBuilder); + }, getRequestUser(crc)); + } + @GET @AuthRequired @Path("{id}/dirindex") @@ -500,7 +532,7 @@ public Response getVersionFiles(@Context ContainerRequestContext crc, @PathParam public Response getFileAccessFolderView(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @QueryParam("version") String versionId, @QueryParam("folder") String folderName, @QueryParam("original") Boolean originals, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) { folderName = folderName == null ? "" : folderName; - versionId = versionId == null ? ":latest-published" : versionId; + versionId = versionId == null ? DS_VERSION_LATEST_PUBLISHED : versionId; DatasetVersion version; try { @@ -574,24 +606,26 @@ public Response getVersionMetadataBlock(@Context ContainerRequestContext crc, @GET @AuthRequired @Path("{id}/versions/{versionId}/linkset") - public Response getLinkset(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { - if ( ":draft".equals(versionId) ) { - return badRequest("Signposting is not supported on the :draft version"); + public Response getLinkset(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, + @Context UriInfo uriInfo, @Context HttpHeaders headers) { + if (DS_VERSION_DRAFT.equals(versionId)) { + return badRequest("Signposting is not supported on the " + DS_VERSION_DRAFT + " version"); } - User user = getRequestUser(crc); - return response(req -> { + DataverseRequest req = createDataverseRequest(getRequestUser(crc)); + try { DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); - return ok(Json.createObjectBuilder().add( - "linkset", - new SignpostingResources( - systemConfig, - dsv, - JvmSettings.SIGNPOSTING_LEVEL1_AUTHOR_LIMIT.lookupOptional().orElse(""), - JvmSettings.SIGNPOSTING_LEVEL1_ITEM_LIMIT.lookupOptional().orElse("") - ).getJsonLinkset() - ) - ); - }, user); + return Response + .ok(Json.createObjectBuilder() + .add("linkset", + new SignpostingResources(systemConfig, dsv, + JvmSettings.SIGNPOSTING_LEVEL1_AUTHOR_LIMIT.lookupOptional().orElse(""), + JvmSettings.SIGNPOSTING_LEVEL1_ITEM_LIMIT.lookupOptional().orElse("")) + .getJsonLinkset()) + .build()) + .type(MediaType.APPLICATION_JSON).build(); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } } @GET @@ -661,16 +695,15 @@ public Response updateDatasetPIDMetadataAll(@Context ContainerRequestContext crc @AuthRequired @Path("{id}/versions/{versionId}") @Consumes(MediaType.APPLICATION_JSON) - public Response updateDraftVersion(@Context ContainerRequestContext crc, String jsonBody, @PathParam("id") String id, @PathParam("versionId") String versionId){ - - if ( ! ":draft".equals(versionId) ) { - return error( Response.Status.BAD_REQUEST, "Only the :draft version can be updated"); + public Response updateDraftVersion(@Context ContainerRequestContext crc, String jsonBody, @PathParam("id") String id, @PathParam("versionId") String versionId) { + if (!DS_VERSION_DRAFT.equals(versionId)) { + return error( Response.Status.BAD_REQUEST, "Only the " + DS_VERSION_DRAFT + " version can be updated"); } - try ( StringReader rdr = new StringReader(jsonBody) ) { + try { DataverseRequest req = createDataverseRequest(getRequestUser(crc)); Dataset ds = findDatasetOrDie(id); - JsonObject json = Json.createReader(rdr).readObject(); + JsonObject json = JsonUtil.getJsonObject(jsonBody); DatasetVersion incomingVersion = jsonParser().parseDatasetVersion(json); // clear possibly stale fields from the incoming dataset version. @@ -708,7 +741,7 @@ public Response updateDraftVersion(@Context ContainerRequestContext crc, String } managedVersion = execCommand(new CreateDatasetVersionCommand(req, ds, incomingVersion)); } - return ok( json(managedVersion) ); + return ok( json(managedVersion, true) ); } catch (JsonParseException ex) { logger.log(Level.SEVERE, "Semantic error parsing dataset version Json: " + ex.getMessage(), ex); @@ -747,7 +780,7 @@ public Response getVersionJsonLDMetadata(@Context ContainerRequestContext crc, @ @Path("{id}/metadata") @Produces("application/ld+json, application/json-ld") public Response getVersionJsonLDMetadata(@Context ContainerRequestContext crc, @PathParam("id") String id, @Context UriInfo uriInfo, @Context HttpHeaders headers) { - return getVersionJsonLDMetadata(crc, id, ":draft", uriInfo, headers); + return getVersionJsonLDMetadata(crc, id, DS_VERSION_LATEST, uriInfo, headers); } @PUT @@ -826,10 +859,10 @@ public Response deleteVersionMetadata(@Context ContainerRequestContext crc, Stri } private Response processDatasetFieldDataDelete(String jsonBody, String id, DataverseRequest req) { - try (StringReader rdr = new StringReader(jsonBody)) { + try { Dataset ds = findDatasetOrDie(id); - JsonObject json = Json.createReader(rdr).readObject(); + JsonObject json = JsonUtil.getJsonObject(jsonBody); //Get the current draft or create a new version to update DatasetVersion dsv = ds.getOrCreateEditVersion(); dsv.getTermsOfUseAndAccess().setDatasetVersion(dsv); @@ -943,7 +976,7 @@ private Response processDatasetFieldDataDelete(String jsonBody, String id, Datav DatasetVersion managedVersion = execCommand(new UpdateDatasetVersionCommand(ds, req)).getLatestVersion(); - return ok(json(managedVersion)); + return ok(json(managedVersion, true)); } catch (JsonParseException ex) { logger.log(Level.SEVERE, "Semantic error parsing dataset update Json: " + ex.getMessage(), ex); @@ -983,10 +1016,10 @@ public Response editVersionMetadata(@Context ContainerRequestContext crc, String private Response processDatasetUpdate(String jsonBody, String id, DataverseRequest req, Boolean replaceData){ - try (StringReader rdr = new StringReader(jsonBody)) { + try { Dataset ds = findDatasetOrDie(id); - JsonObject json = Json.createReader(rdr).readObject(); + JsonObject json = JsonUtil.getJsonObject(jsonBody); //Get the current draft or create a new version to update DatasetVersion dsv = ds.getOrCreateEditVersion(); dsv.getTermsOfUseAndAccess().setDatasetVersion(dsv); @@ -1092,7 +1125,7 @@ private Response processDatasetUpdate(String jsonBody, String id, DataverseReque } DatasetVersion managedVersion = execCommand(new UpdateDatasetVersionCommand(ds, req)).getLatestVersion(); - return ok(json(managedVersion)); + return ok(json(managedVersion, true)); } catch (JsonParseException ex) { logger.log(Level.SEVERE, "Semantic error parsing dataset update Json: " + ex.getMessage(), ex); @@ -1433,8 +1466,7 @@ public Response createFileEmbargo(@Context ContainerRequestContext crc, @PathPar return error(Status.BAD_REQUEST, "No Embargoes allowed"); } - StringReader rdr = new StringReader(jsonBody); - JsonObject json = Json.createReader(rdr).readObject(); + JsonObject json = JsonUtil.getJsonObject(jsonBody); Embargo embargo = new Embargo(); @@ -1577,8 +1609,7 @@ public Response removeFileEmbargo(@Context ContainerRequestContext crc, @PathPar return error(Status.BAD_REQUEST, "No Embargoes allowed"); } - StringReader rdr = new StringReader(jsonBody); - JsonObject json = Json.createReader(rdr).readObject(); + JsonObject json = JsonUtil.getJsonObject(jsonBody); List datasetFiles = dataset.getFiles(); List embargoFilesToUnset = new LinkedList<>(); @@ -1681,7 +1712,7 @@ public Response getCustomTermsTab(@PathParam("id") String id, @PathParam("versio return error(Status.NOT_FOUND, "This Dataset has no custom license"); } persistentId = getRequestParameter(":persistentId".substring(1)); - if (versionId.equals(":draft")) { + if (versionId.equals(DS_VERSION_DRAFT)) { versionId = "DRAFT"; } } catch (WrappedResponse wrappedResponse) { @@ -1880,6 +1911,22 @@ public Response getDatasetThumbnail(@PathParam("id") String idSupplied) { } } + @GET + @Produces({ "image/png" }) + @Path("{id}/logo") + public Response getDatasetLogo(@PathParam("id") String idSupplied) { + try { + Dataset dataset = findDatasetOrDie(idSupplied); + InputStream is = DatasetUtil.getLogoAsInputStream(dataset); + if (is == null) { + return notFound("Logo not available"); + } + return Response.ok(is).build(); + } catch (WrappedResponse wr) { + return notFound("Logo not available"); + } + } + // TODO: Rather than only supporting looking up files by their database IDs (dataFileIdSupplied), consider supporting persistent identifiers. @POST @AuthRequired @@ -2093,15 +2140,13 @@ public Response returnToAuthor(@Context ContainerRequestContext crc, @PathParam( if (jsonBody == null || jsonBody.isEmpty()) { return error(Response.Status.BAD_REQUEST, "You must supply JSON to this API endpoint and it must contain a reason for returning the dataset (field: reasonForReturn)."); } - StringReader rdr = new StringReader(jsonBody); - JsonObject json = Json.createReader(rdr).readObject(); + JsonObject json = JsonUtil.getJsonObject(jsonBody); try { Dataset dataset = findDatasetOrDie(idSupplied); String reasonForReturn = null; reasonForReturn = json.getString("reasonForReturn"); - // TODO: Once we add a box for the curator to type into, pass the reason for return to the ReturnDatasetToAuthorCommand and delete this check and call to setReturnReason on the API side. if (reasonForReturn == null || reasonForReturn.isEmpty()) { - return error(Response.Status.BAD_REQUEST, "You must enter a reason for returning a dataset to the author(s)."); + return error(Response.Status.BAD_REQUEST, BundleUtil.getStringFromBundle("dataset.reject.datasetNotInReview")); } AuthenticatedUser authenticatedUser = getRequestAuthenticatedUserOrDie(crc); Dataset updatedDataset = execCommand(new ReturnDatasetToAuthorCommand(createDataverseRequest(authenticatedUser), dataset, reasonForReturn )); @@ -2346,9 +2391,7 @@ public Response completeMPUpload(@Context ContainerRequestContext crc, String pa List eTagList = new ArrayList(); logger.info("Etags: " + partETagBody); try { - JsonReader jsonReader = Json.createReader(new StringReader(partETagBody)); - JsonObject object = jsonReader.readObject(); - jsonReader.close(); + JsonObject object = JsonUtil.getJsonObject(partETagBody); for (String partNo : object.keySet()) { eTagList.add(new PartETag(Integer.parseInt(partNo), object.getString(partNo))); } @@ -2642,11 +2685,11 @@ private void msgt(String m) { public static T handleVersion(String versionId, DsVersionHandler hdl) throws WrappedResponse { switch (versionId) { - case ":latest": + case DS_VERSION_LATEST: return hdl.handleLatest(); - case ":draft": + case DS_VERSION_DRAFT: return hdl.handleDraft(); - case ":latest-published": + case DS_VERSION_LATEST_PUBLISHED: return hdl.handleLatestPublished(); default: try { @@ -2665,29 +2708,27 @@ public static T handleVersion(String versionId, DsVersionHandler hdl) } } + /* + * includeDeaccessioned default to false and checkPermsWhenDeaccessioned to false. Use it only when you are sure that the you don't need to work with + * a deaccessioned dataset. + */ private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers) throws WrappedResponse { - DatasetVersion dsv = execCommand(handleVersion(versionNumber, new DsVersionHandler>() { - - @Override - public Command handleLatest() { - return new GetLatestAccessibleDatasetVersionCommand(req, ds); - } - - @Override - public Command handleDraft() { - return new GetDraftDatasetVersionCommand(req, ds); - } - - @Override - public Command handleSpecific(long major, long minor) { - return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor); - } + //The checkPerms was added to check the permissions ONLY when the dataset is deaccessioned. + return getDatasetVersionOrDie(req, versionNumber, ds, uriInfo, headers, false, false); + } + + /* + * checkPermsWhenDeaccessioned default to true. Be aware that the version will be only be obtainable if the user has edit permissions. + */ + private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers, boolean includeDeaccessioned) throws WrappedResponse{ + return getDatasetVersionOrDie(req, versionNumber, ds, uriInfo, headers, includeDeaccessioned, true); + } - @Override - public Command handleLatestPublished() { - return new GetLatestPublishedDatasetVersionCommand(req, ds); - } - })); + /* + * Will allow to define when the permissions should be checked when a deaccesioned dataset is requested. If the user doesn't have edit permissions will result in an error. + */ + private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers, boolean includeDeaccessioned, boolean checkPermsWhenDeaccessioned) throws WrappedResponse { + DatasetVersion dsv = findDatasetVersionOrDie(req, versionNumber, ds, includeDeaccessioned, checkPermsWhenDeaccessioned); if (dsv == null || dsv.getId() == null) { throw new WrappedResponse(notFound("Dataset version " + versionNumber + " of dataset " + ds.getId() + " not found")); } @@ -2898,25 +2939,58 @@ public Response getMakeDataCountMetricCurrentMonth(@PathParam("id") String idSup String nullCurrentMonth = null; return getMakeDataCountMetric(idSupplied, metricSupplied, nullCurrentMonth, country); } - + @GET @AuthRequired @Path("{identifier}/storagesize") - public Response getStorageSize(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, @QueryParam("includeCached") boolean includeCached, - @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { - + public Response getStorageSize(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, @QueryParam("includeCached") boolean includeCached) { return response(req -> ok(MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize.storage"), execCommand(new GetDatasetStorageSizeCommand(req, findDatasetOrDie(dvIdtf), includeCached, GetDatasetStorageSizeCommand.Mode.STORAGE, null)))), getRequestUser(crc)); } - + @GET @AuthRequired @Path("{identifier}/versions/{versionId}/downloadsize") - public Response getDownloadSize(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, @PathParam("versionId") String version, - @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { + public Response getDownloadSize(@Context ContainerRequestContext crc, + @PathParam("identifier") String dvIdtf, + @PathParam("versionId") String version, + @QueryParam("contentType") String contentType, + @QueryParam("accessStatus") String accessStatus, + @QueryParam("categoryName") String categoryName, + @QueryParam("tabularTagName") String tabularTagName, + @QueryParam("searchText") String searchText, + @QueryParam("mode") String mode, + @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, + @Context UriInfo uriInfo, + @Context HttpHeaders headers) { - return response(req -> ok(MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize.download"), - execCommand(new GetDatasetStorageSizeCommand(req, findDatasetOrDie(dvIdtf), false, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers))))), getRequestUser(crc)); + return response(req -> { + FileSearchCriteria fileSearchCriteria; + try { + fileSearchCriteria = new FileSearchCriteria( + contentType, + accessStatus != null ? FileSearchCriteria.FileAccessStatus.valueOf(accessStatus) : null, + categoryName, + tabularTagName, + searchText + ); + } catch (IllegalArgumentException e) { + return badRequest(BundleUtil.getStringFromBundle("datasets.api.version.files.invalid.access.status", List.of(accessStatus))); + } + DatasetVersionFilesServiceBean.FileDownloadSizeMode fileDownloadSizeMode; + try { + fileDownloadSizeMode = mode != null ? DatasetVersionFilesServiceBean.FileDownloadSizeMode.valueOf(mode) : DatasetVersionFilesServiceBean.FileDownloadSizeMode.All; + } catch (IllegalArgumentException e) { + return error(Response.Status.BAD_REQUEST, "Invalid mode: " + mode); + } + DatasetVersion datasetVersion = getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers, includeDeaccessioned); + long datasetStorageSize = datasetVersionFilesServiceBean.getFilesDownloadSize(datasetVersion, fileSearchCriteria, fileDownloadSizeMode); + String message = MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize.download"), datasetStorageSize); + JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder(); + jsonObjectBuilder.add("message", message); + jsonObjectBuilder.add("storageSize", datasetStorageSize); + return ok(jsonObjectBuilder); + }, getRequestUser(crc)); } @GET @@ -3317,15 +3391,246 @@ public Response getTimestamps(@Context ContainerRequestContext crc, @PathParam(" } +/**************************** + * Globus Support Section: + * + * Globus transfer in (upload) and out (download) involve three basic steps: The + * app is launched and makes a callback to the + * globusUploadParameters/globusDownloadParameters method to get all of the info + * needed to set up it's display. + * + * At some point after that, the user will make a selection as to which files to + * transfer and the app will call requestGlobusUploadPaths/requestGlobusDownload + * to indicate a transfer is about to start. In addition to providing the + * details of where to transfer the files to/from, Dataverse also grants the + * Globus principal involved the relevant rw or r permission for the dataset. + * + * Once the transfer is started, the app records the task id and sends it to + * Dataverse in the addGlobusFiles/monitorGlobusDownload call. Dataverse then + * monitors the transfer task and when it ultimately succeeds for fails it + * revokes the principal's permission and, for the transfer in case, adds the + * files to the dataset. (The dataset is locked until the transfer completes.) + * + * (If no transfer is started within a specified timeout, permissions will + * automatically be revoked - see the GlobusServiceBean for details.) + * + * The option to reference a file at a remote endpoint (rather than transfer it) + * follows the first two steps of the process above but completes with a call to + * the normal /addFiles endpoint (as there is no transfer to monitor and the + * files can be added to the dataset immediately.) + */ + + /** + * Retrieve the parameters and signed URLs required to perform a globus + * transfer. This api endpoint is expected to be called as a signed callback + * after the globus-dataverse app/other app is launched, but it will accept + * other forms of authentication. + * + * @param crc + * @param datasetId + */ + @GET + @AuthRequired + @Path("{id}/globusUploadParameters") + @Produces(MediaType.APPLICATION_JSON) + public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, + @QueryParam(value = "locale") String locale) { + // ------------------------------------- + // (1) Get the user from the ContainerRequestContext + // ------------------------------------- + AuthenticatedUser authUser; + try { + authUser = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse e) { + return e.getResponse(); + } + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + String storeId = dataset.getEffectiveStorageDriverId(); + // acceptsGlobusTransfers should only be true for an S3 or globus store + if (!GlobusAccessibleStore.acceptsGlobusTransfers(storeId) + && !GlobusAccessibleStore.allowsGlobusReferences(storeId)) { + return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled")); + } + + URLTokenUtil tokenUtil = new URLTokenUtil(dataset, authSvc.findApiTokenByUser(authUser), locale); + + boolean managed = GlobusAccessibleStore.isDataverseManaged(storeId); + String transferEndpoint = null; + JsonArray referenceEndpointsWithPaths = null; + if (managed) { + transferEndpoint = GlobusAccessibleStore.getTransferEndpointId(storeId); + } else { + referenceEndpointsWithPaths = GlobusAccessibleStore.getReferenceEndpointsWithPaths(storeId); + } + + JsonObjectBuilder queryParams = Json.createObjectBuilder(); + queryParams.add("queryParameters", + Json.createArrayBuilder().add(Json.createObjectBuilder().add("datasetId", "{datasetId}")) + .add(Json.createObjectBuilder().add("siteUrl", "{siteUrl}")) + .add(Json.createObjectBuilder().add("datasetVersion", "{datasetVersion}")) + .add(Json.createObjectBuilder().add("dvLocale", "{localeCode}")) + .add(Json.createObjectBuilder().add("datasetPid", "{datasetPid}"))); + JsonObject substitutedParams = tokenUtil.getParams(queryParams.build()); + JsonObjectBuilder params = Json.createObjectBuilder(); + substitutedParams.keySet().forEach((key) -> { + params.add(key, substitutedParams.get(key)); + }); + params.add("managed", Boolean.toString(managed)); + if (transferEndpoint != null) { + params.add("endpoint", transferEndpoint); + } else { + params.add("referenceEndpointsWithPaths", referenceEndpointsWithPaths); + } + int timeoutSeconds = JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class); + JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder(); + String requestCallName = managed ? "requestGlobusTransferPaths" : "requestGlobusReferencePaths"; + allowedApiCalls.add( + Json.createObjectBuilder().add(URLTokenUtil.NAME, requestCallName).add(URLTokenUtil.HTTP_METHOD, "POST") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusUploadPaths") + .add(URLTokenUtil.TIMEOUT, timeoutSeconds)); + if(managed) { + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "addGlobusFiles") + .add(URLTokenUtil.HTTP_METHOD, "POST") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/addGlobusFiles") + .add(URLTokenUtil.TIMEOUT, timeoutSeconds)); + } else { + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "addFiles") + .add(URLTokenUtil.HTTP_METHOD, "POST") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/addFiles") + .add(URLTokenUtil.TIMEOUT, timeoutSeconds)); + } + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "getDatasetMetadata") + .add(URLTokenUtil.HTTP_METHOD, "GET") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}") + .add(URLTokenUtil.TIMEOUT, 5)); + allowedApiCalls.add( + Json.createObjectBuilder().add(URLTokenUtil.NAME, "getFileListing").add(URLTokenUtil.HTTP_METHOD, "GET") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}/files") + .add(URLTokenUtil.TIMEOUT, 5)); + + return ok(tokenUtil.createPostBody(params.build(), allowedApiCalls.build())); + } + + /** + * Provides specific storageIdentifiers to use for each file amd requests permissions for a given globus user to upload to the dataset + * + * @param crc + * @param datasetId + * @param jsonData - an object that must include the id of the globus "principal" involved and the "numberOfFiles" that will be transferred. + * @return + * @throws IOException + * @throws ExecutionException + * @throws InterruptedException + */ @POST @AuthRequired - @Path("{id}/addglobusFiles") + @Path("{id}/requestGlobusUploadPaths") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, + String jsonBody) throws IOException, ExecutionException, InterruptedException { + + logger.info(" ==== (api allowGlobusUpload) jsonBody ====== " + jsonBody); + + if (!systemConfig.isGlobusUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, + BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled")); + } + + // ------------------------------------- + // (1) Get the user from the ContainerRequestContext + // ------------------------------------- + AuthenticatedUser authUser; + try { + authUser = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse e) { + return e.getResponse(); + } + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + if (permissionSvc.requestOn(createDataverseRequest(authUser), dataset) + .canIssue(UpdateDatasetVersionCommand.class)) { + + JsonObject params = JsonUtil.getJsonObject(jsonBody); + if (!GlobusAccessibleStore.isDataverseManaged(dataset.getEffectiveStorageDriverId())) { + try { + JsonArray referencedFiles = params.getJsonArray("referencedFiles"); + if (referencedFiles == null || referencedFiles.size() == 0) { + return badRequest("No referencedFiles specified"); + } + JsonObject fileMap = globusService.requestReferenceFileIdentifiers(dataset, referencedFiles); + return (ok(fileMap)); + } catch (Exception e) { + return badRequest(e.getLocalizedMessage()); + } + } else { + try { + String principal = params.getString("principal"); + int numberOfPaths = params.getInt("numberOfFiles"); + if (numberOfPaths <= 0) { + return badRequest("numberOfFiles must be positive"); + } + + JsonObject response = globusService.requestAccessiblePaths(principal, dataset, numberOfPaths); + switch (response.getInt("status")) { + case 201: + return ok(response.getJsonObject("paths")); + case 400: + return badRequest("Unable to grant permission"); + case 409: + return conflict("Permission already exists"); + default: + return error(null, "Unexpected error when granting permission"); + } + + } catch (NullPointerException | ClassCastException e) { + return badRequest("Error retrieving principal and numberOfFiles from JSON request body"); + + } + } + } else { + return forbidden("User doesn't have permission to upload to this dataset"); + } + + } + + /** A method analogous to /addFiles that must also include the taskIdentifier of the transfer-in-progress to monitor + * + * @param crc + * @param datasetId + * @param jsonData - see /addFiles documentation, aditional "taskIdentifier" key in the main object is required. + * @param uriInfo + * @return + * @throws IOException + * @throws ExecutionException + * @throws InterruptedException + */ + @POST + @AuthRequired + @Path("{id}/addGlobusFiles") @Consumes(MediaType.MULTIPART_FORM_DATA) public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @FormDataParam("jsonData") String jsonData, - @Context UriInfo uriInfo, - @Context HttpHeaders headers + @Context UriInfo uriInfo ) throws IOException, ExecutionException, InterruptedException { logger.info(" ==== (api addGlobusFilesToDataset) jsonData ====== " + jsonData); @@ -3355,6 +3660,15 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, } catch (WrappedResponse wr) { return wr.getResponse(); } + + JsonObject jsonObject = null; + try { + jsonObject = JsonUtil.getJsonObject(jsonData); + } catch (Exception ex) { + logger.fine("Error parsing json: " + jsonData + " " + ex.getMessage()); + return badRequest("Error parsing json body"); + + } //------------------------------------ // (2b) Make sure dataset does not have package file @@ -3385,32 +3699,279 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, } - String requestUrl = headers.getRequestHeader("origin").get(0); + String requestUrl = SystemConfig.getDataverseSiteUrlStatic(); + + // Async Call + globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser); + + return ok("Async call to Globus Upload started "); - if(requestUrl.contains("localhost")){ - requestUrl = "http://localhost:8080"; + } + +/** + * Retrieve the parameters and signed URLs required to perform a globus + * transfer/download. This api endpoint is expected to be called as a signed + * callback after the globus-dataverse app/other app is launched, but it will + * accept other forms of authentication. + * + * @param crc + * @param datasetId + * @param locale + * @param downloadId - an id to a cached object listing the files involved. This is generated via Dataverse and provided to the dataverse-globus app in a signedURL. + * @return - JSON containing the parameters and URLs needed by the dataverse-globus app. The format is analogous to that for external tools. + */ + @GET + @AuthRequired + @Path("{id}/globusDownloadParameters") + @Produces(MediaType.APPLICATION_JSON) + public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, + @QueryParam(value = "locale") String locale, @QueryParam(value = "downloadId") String downloadId) { + // ------------------------------------- + // (1) Get the user from the ContainerRequestContext + // ------------------------------------- + AuthenticatedUser authUser; + try { + authUser = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse e) { + return e.getResponse(); } + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; - // Async Call - globusService.globusUpload(jsonData, token, dataset, requestUrl, authUser); + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + String storeId = dataset.getEffectiveStorageDriverId(); + // acceptsGlobusTransfers should only be true for an S3 or globus store + if (!(GlobusAccessibleStore.acceptsGlobusTransfers(storeId) + || GlobusAccessibleStore.allowsGlobusReferences(storeId))) { + return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled")); + } - return ok("Async call to Globus Upload started "); + JsonObject files = globusService.getFilesForDownload(downloadId); + if (files == null) { + return notFound(BundleUtil.getStringFromBundle("datasets.api.globusdownloadnotfound")); + } + URLTokenUtil tokenUtil = new URLTokenUtil(dataset, authSvc.findApiTokenByUser(authUser), locale); + + boolean managed = GlobusAccessibleStore.isDataverseManaged(storeId); + String transferEndpoint = null; + + JsonObjectBuilder queryParams = Json.createObjectBuilder(); + queryParams.add("queryParameters", + Json.createArrayBuilder().add(Json.createObjectBuilder().add("datasetId", "{datasetId}")) + .add(Json.createObjectBuilder().add("siteUrl", "{siteUrl}")) + .add(Json.createObjectBuilder().add("datasetVersion", "{datasetVersion}")) + .add(Json.createObjectBuilder().add("dvLocale", "{localeCode}")) + .add(Json.createObjectBuilder().add("datasetPid", "{datasetPid}"))); + JsonObject substitutedParams = tokenUtil.getParams(queryParams.build()); + JsonObjectBuilder params = Json.createObjectBuilder(); + substitutedParams.keySet().forEach((key) -> { + params.add(key, substitutedParams.get(key)); + }); + params.add("managed", Boolean.toString(managed)); + if (managed) { + transferEndpoint = GlobusAccessibleStore.getTransferEndpointId(storeId); + params.add("endpoint", transferEndpoint); + } + params.add("files", files); + int timeoutSeconds = JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class); + JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder(); + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "monitorGlobusDownload") + .add(URLTokenUtil.HTTP_METHOD, "POST") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/monitorGlobusDownload") + .add(URLTokenUtil.TIMEOUT, timeoutSeconds)); + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "requestGlobusDownload") + .add(URLTokenUtil.HTTP_METHOD, "POST") + .add(URLTokenUtil.URL_TEMPLATE, + "/api/v1/datasets/{datasetId}/requestGlobusDownload?downloadId=" + downloadId) + .add(URLTokenUtil.TIMEOUT, timeoutSeconds)); + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "getDatasetMetadata") + .add(URLTokenUtil.HTTP_METHOD, "GET") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}") + .add(URLTokenUtil.TIMEOUT, 5)); + allowedApiCalls.add( + Json.createObjectBuilder().add(URLTokenUtil.NAME, "getFileListing").add(URLTokenUtil.HTTP_METHOD, "GET") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}/files") + .add(URLTokenUtil.TIMEOUT, 5)); + + return ok(tokenUtil.createPostBody(params.build(), allowedApiCalls.build())); } + /** + * Requests permissions for a given globus user to download the specified files + * the dataset and returns information about the paths to transfer from. + * + * When called directly rather than in response to being given a downloadId, the jsonData can include a "fileIds" key with an array of file ids to transfer. + * + * @param crc + * @param datasetId + * @param jsonData - a JSON object that must include the id of the Globus "principal" that will be transferring the files in the case where Dataverse manages the Globus endpoint. For remote endpoints, the principal is not required. + * @return - a JSON object containing a map of file ids to Globus endpoint/path + * @throws IOException + * @throws ExecutionException + * @throws InterruptedException + */ @POST @AuthRequired - @Path("{id}/deleteglobusRule") - @Consumes(MediaType.MULTIPART_FORM_DATA) - public Response deleteglobusRule(@Context ContainerRequestContext crc, @PathParam("id") String datasetId,@FormDataParam("jsonData") String jsonData - ) throws IOException, ExecutionException, InterruptedException { + @Path("{id}/requestGlobusDownload") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + public Response requestGlobusDownload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, + @QueryParam(value = "downloadId") String downloadId, String jsonBody) + throws IOException, ExecutionException, InterruptedException { + logger.info(" ==== (api allowGlobusDownload) jsonBody ====== " + jsonBody); - logger.info(" ==== (api deleteglobusRule) jsonData ====== " + jsonData); + if (!systemConfig.isGlobusDownload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, + BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled")); + } + // ------------------------------------- + // (1) Get the user from the ContainerRequestContext + // ------------------------------------- + User user = getRequestUser(crc); - if (!systemConfig.isHTTPUpload()) { - return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + JsonObject body = null; + if (jsonBody != null) { + body = JsonUtil.getJsonObject(jsonBody); + } + Set fileIds = null; + if (downloadId != null) { + JsonObject files = globusService.getFilesForDownload(downloadId); + if (files != null) { + fileIds = files.keySet(); + } + } else { + if ((body!=null) && body.containsKey("fileIds")) { + Collection fileVals = body.getJsonArray("fileIds").getValuesAs(JsonValue.class); + fileIds = new HashSet(fileVals.size()); + for (JsonValue fileVal : fileVals) { + String id = null; + switch (fileVal.getValueType()) { + case STRING: + id = ((JsonString) fileVal).getString(); + break; + case NUMBER: + id = ((JsonNumber) fileVal).toString(); + break; + default: + return badRequest("fileIds must be numeric or string (ids/PIDs)"); + } + ; + fileIds.add(id); + } + } else { + return badRequest("fileIds JsonArray of file ids/PIDs required in POST body"); + } + } + + if (fileIds.isEmpty()) { + return notFound(BundleUtil.getStringFromBundle("datasets.api.globusdownloadnotfound")); + } + ArrayList dataFiles = new ArrayList(fileIds.size()); + for (String id : fileIds) { + boolean published = false; + logger.info("File id: " + id); + + DataFile df = null; + try { + df = findDataFileOrDie(id); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + if (!df.getOwner().equals(dataset)) { + return badRequest("All files must be in the dataset"); + } + dataFiles.add(df); + + for (FileMetadata fm : df.getFileMetadatas()) { + if (fm.getDatasetVersion().isPublished()) { + published = true; + break; + } + } + + if (!published) { + // If the file is not published, they can still download the file, if the user + // has the permission to view unpublished versions: + + if (!permissionService.hasPermissionsFor(user, df.getOwner(), + EnumSet.of(Permission.ViewUnpublishedDataset))) { + return forbidden("User doesn't have permission to download file: " + id); + } + } else { // published and restricted and/or embargoed + if (df.isRestricted() || FileUtil.isActivelyEmbargoed(df)) + // This line also handles all three authenticated session user, token user, and + // guest cases. + if (!permissionService.hasPermissionsFor(user, df, EnumSet.of(Permission.DownloadFile))) { + return forbidden("User doesn't have permission to download file: " + id); + } + + } + } + // Allowed to download all requested files + JsonObject files = GlobusUtil.getFilesMap(dataFiles, dataset); + if (GlobusAccessibleStore.isDataverseManaged(dataset.getEffectiveStorageDriverId())) { + // If managed, give the principal read permissions + int status = globusService.setPermissionForDownload(dataset, body.getString("principal")); + switch (status) { + case 201: + return ok(files); + case 400: + return badRequest("Unable to grant permission"); + case 409: + return conflict("Permission already exists"); + default: + return error(null, "Unexpected error when granting permission"); + } + + } + + return ok(files); + } + + /** + * Monitors a globus download and removes permissions on the dir/dataset when + * the specified transfer task is completed. + * + * @param crc + * @param datasetId + * @param jsonData - a JSON Object containing the key "taskIdentifier" with the + * Globus task to monitor. + * @return + * @throws IOException + * @throws ExecutionException + * @throws InterruptedException + */ + @POST + @AuthRequired + @Path("{id}/monitorGlobusDownload") + @Consumes(MediaType.APPLICATION_JSON) + public Response monitorGlobusDownload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, + String jsonData) throws IOException, ExecutionException, InterruptedException { + + logger.info(" ==== (api deleteglobusRule) jsonData ====== " + jsonData); + + if (!systemConfig.isGlobusDownload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, + BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled")); } // ------------------------------------- @@ -3437,7 +3998,6 @@ public Response deleteglobusRule(@Context ContainerRequestContext crc, @PathPara } - /** * Add multiple Files to an existing Dataset * @@ -3449,9 +4009,8 @@ public Response deleteglobusRule(@Context ContainerRequestContext crc, @PathPara @AuthRequired @Path("{id}/addFiles") @Consumes(MediaType.MULTIPART_FORM_DATA) - public Response addFilesToDataset(@Context ContainerRequestContext crc, - @PathParam("id") String idSupplied, - @FormDataParam("jsonData") String jsonData) { + public Response addFilesToDataset(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied, + @FormDataParam("jsonData") String jsonData) { if (!systemConfig.isHTTPUpload()) { return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); @@ -3659,7 +4218,7 @@ public Response getDatasetVersionArchivalStatus(@Context ContainerRequestContext headers); if (dsv.getArchivalCopyLocation() == null) { - return error(Status.NO_CONTENT, "This dataset version has not been archived"); + return error(Status.NOT_FOUND, "This dataset version has not been archived"); } else { JsonObject status = JsonUtil.getJsonObject(dsv.getArchivalCopyLocation()); return ok(status); @@ -3805,13 +4364,10 @@ public Response getExternalToolDVParams(@Context ContainerRequestContext crc, } ApiToken apiToken = null; User u = getRequestUser(crc); - if (u instanceof AuthenticatedUser) { - apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) u); - } - + apiToken = authSvc.getValidApiTokenForUser(u); - ExternalToolHandler eth = new ExternalToolHandler(externalTool, target.getDataset(), apiToken, locale); - return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())))); + URLTokenUtil eth = new ExternalToolHandler(externalTool, target.getDataset(), apiToken, locale); + return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())), JsonUtil.getJsonArray(externalTool.getAllowedApiCalls()))); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -3831,7 +4387,7 @@ public Response getDatasetSummaryFieldNames() { @GET @Path("privateUrlDatasetVersion/{privateUrlToken}") - public Response getPrivateUrlDatasetVersion(@PathParam("privateUrlToken") String privateUrlToken) { + public Response getPrivateUrlDatasetVersion(@PathParam("privateUrlToken") String privateUrlToken, @QueryParam("returnOwners") boolean returnOwners) { PrivateUrlUser privateUrlUser = privateUrlService.getPrivateUrlUserFromToken(privateUrlToken); if (privateUrlUser == null) { return notFound("Private URL user not found"); @@ -3848,9 +4404,9 @@ public Response getPrivateUrlDatasetVersion(@PathParam("privateUrlToken") String JsonObjectBuilder responseJson; if (isAnonymizedAccess) { List anonymizedFieldTypeNamesList = new ArrayList<>(Arrays.asList(anonymizedFieldTypeNames.split(",\\s"))); - responseJson = json(dsv, anonymizedFieldTypeNamesList); + responseJson = json(dsv, anonymizedFieldTypeNamesList, true, returnOwners); } else { - responseJson = json(dsv); + responseJson = json(dsv, null, true, returnOwners); } return ok(responseJson); } @@ -3870,8 +4426,243 @@ public Response getPrivateUrlDatasetVersionCitation(@PathParam("privateUrlToken" @GET @AuthRequired @Path("{id}/versions/{versionId}/citation") - public Response getDatasetVersionCitation(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + public Response getDatasetVersionCitation(@Context ContainerRequestContext crc, + @PathParam("id") String datasetId, + @PathParam("versionId") String versionId, + @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, + @Context UriInfo uriInfo, + @Context HttpHeaders headers) { return response(req -> ok( - getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers).getCitation(true, false)), getRequestUser(crc)); + getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, false).getCitation(true, false)), getRequestUser(crc)); } + + @POST + @AuthRequired + @Path("{id}/versions/{versionId}/deaccession") + public Response deaccessionDataset(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, String jsonBody, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + if (DS_VERSION_DRAFT.equals(versionId) || DS_VERSION_LATEST.equals(versionId)) { + return badRequest(BundleUtil.getStringFromBundle("datasets.api.deaccessionDataset.invalid.version.identifier.error", List.of(DS_VERSION_LATEST_PUBLISHED))); + } + return response(req -> { + DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); + try { + JsonObject jsonObject = JsonUtil.getJsonObject(jsonBody); + datasetVersion.setVersionNote(jsonObject.getString("deaccessionReason")); + String deaccessionForwardURL = jsonObject.getString("deaccessionForwardURL", null); + if (deaccessionForwardURL != null) { + try { + datasetVersion.setArchiveNote(deaccessionForwardURL); + } catch (IllegalArgumentException iae) { + return badRequest(BundleUtil.getStringFromBundle("datasets.api.deaccessionDataset.invalid.forward.url", List.of(iae.getMessage()))); + } + } + execCommand(new DeaccessionDatasetVersionCommand(req, datasetVersion, false)); + return ok("Dataset " + datasetId + " deaccessioned for version " + versionId); + } catch (JsonParsingException jpe) { + return error(Response.Status.BAD_REQUEST, "Error parsing Json: " + jpe.getMessage()); + } + }, getRequestUser(crc)); + } + + @GET + @AuthRequired + @Path("{identifier}/guestbookEntryAtRequest") + public Response getGuestbookEntryOption(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, + @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { + + Dataset dataset; + + try { + dataset = findDatasetOrDie(dvIdtf); + } catch (WrappedResponse ex) { + return error(Response.Status.NOT_FOUND, "No such dataset"); + } + String gbAtRequest = dataset.getGuestbookEntryAtRequest(); + if(gbAtRequest == null || gbAtRequest.equals(DvObjectContainer.UNDEFINED_CODE)) { + return ok("Not set on dataset, using the default: " + dataset.getEffectiveGuestbookEntryAtRequest()); + } + return ok(dataset.getEffectiveGuestbookEntryAtRequest()); + } + + @PUT + @AuthRequired + @Path("{identifier}/guestbookEntryAtRequest") + public Response setguestbookEntryAtRequest(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, + boolean gbAtRequest, + @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { + + // Superuser-only: + AuthenticatedUser user; + try { + user = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse ex) { + return error(Response.Status.BAD_REQUEST, "Authentication is required."); + } + if (!user.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + + Dataset dataset; + + try { + dataset = findDatasetOrDie(dvIdtf); + } catch (WrappedResponse ex) { + return error(Response.Status.NOT_FOUND, "No such dataset"); + } + Optional gbAtRequestOpt = JvmSettings.GUESTBOOK_AT_REQUEST.lookupOptional(Boolean.class); + if (!gbAtRequestOpt.isPresent()) { + return error(Response.Status.FORBIDDEN, "Guestbook Entry At Request cannot be set. This server is not configured to allow it."); + } + String choice = Boolean.valueOf(gbAtRequest).toString(); + dataset.setGuestbookEntryAtRequest(choice); + datasetService.merge(dataset); + return ok("Guestbook Entry At Request set to: " + choice); + } + + @DELETE + @AuthRequired + @Path("{identifier}/guestbookEntryAtRequest") + public Response resetGuestbookEntryAtRequest(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, + @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { + + // Superuser-only: + AuthenticatedUser user; + try { + user = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse ex) { + return error(Response.Status.BAD_REQUEST, "Authentication is required."); + } + if (!user.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + + Dataset dataset; + + try { + dataset = findDatasetOrDie(dvIdtf); + } catch (WrappedResponse ex) { + return error(Response.Status.NOT_FOUND, "No such dataset"); + } + + dataset.setGuestbookEntryAtRequest(DvObjectContainer.UNDEFINED_CODE); + datasetService.merge(dataset); + return ok("Guestbook Entry At Request reset to default: " + dataset.getEffectiveGuestbookEntryAtRequest()); + } + + @GET + @AuthRequired + @Path("{id}/userPermissions") + public Response getUserPermissionsOnDataset(@Context ContainerRequestContext crc, @PathParam("id") String datasetId) { + Dataset dataset; + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + User requestUser = getRequestUser(crc); + JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder(); + jsonObjectBuilder.add("canViewUnpublishedDataset", permissionService.userOn(requestUser, dataset).has(Permission.ViewUnpublishedDataset)); + jsonObjectBuilder.add("canEditDataset", permissionService.userOn(requestUser, dataset).has(Permission.EditDataset)); + jsonObjectBuilder.add("canPublishDataset", permissionService.userOn(requestUser, dataset).has(Permission.PublishDataset)); + jsonObjectBuilder.add("canManageDatasetPermissions", permissionService.userOn(requestUser, dataset).has(Permission.ManageDatasetPermissions)); + jsonObjectBuilder.add("canDeleteDatasetDraft", permissionService.userOn(requestUser, dataset).has(Permission.DeleteDatasetDraft)); + return ok(jsonObjectBuilder); + } + + @GET + @AuthRequired + @Path("{id}/versions/{versionId}/canDownloadAtLeastOneFile") + public Response getCanDownloadAtLeastOneFile(@Context ContainerRequestContext crc, + @PathParam("id") String datasetId, + @PathParam("versionId") String versionId, + @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, + @Context UriInfo uriInfo, + @Context HttpHeaders headers) { + return response(req -> { + DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned); + return ok(permissionService.canDownloadAtLeastOneFile(req, datasetVersion)); + }, getRequestUser(crc)); + } + + @GET + @AuthRequired + @Path("{identifier}/pidGenerator") + public Response getPidGenerator(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, + @Context HttpHeaders headers) throws WrappedResponse { + + Dataset dataset; + + try { + dataset = findDatasetOrDie(dvIdtf); + } catch (WrappedResponse ex) { + return error(Response.Status.NOT_FOUND, "No such dataset"); + } + String pidGeneratorId = dataset.getPidGeneratorId(); + return ok(pidGeneratorId); + } + + @PUT + @AuthRequired + @Path("{identifier}/pidGenerator") + public Response setPidGenerator(@Context ContainerRequestContext crc, @PathParam("identifier") String datasetId, + String generatorId, @Context HttpHeaders headers) throws WrappedResponse { + + // Superuser-only: + AuthenticatedUser user; + try { + user = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse ex) { + return error(Response.Status.UNAUTHORIZED, "Authentication is required."); + } + if (!user.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse ex) { + return error(Response.Status.NOT_FOUND, "No such dataset"); + } + if (PidUtil.getManagedProviderIds().contains(generatorId)) { + dataset.setPidGeneratorId(generatorId); + datasetService.merge(dataset); + return ok("PID Generator set to: " + generatorId); + } else { + return error(Response.Status.NOT_FOUND, "No PID Generator found for the give id"); + } + + } + + @DELETE + @AuthRequired + @Path("{identifier}/pidGenerator") + public Response resetPidGenerator(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, + @Context HttpHeaders headers) throws WrappedResponse { + + // Superuser-only: + AuthenticatedUser user; + try { + user = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse ex) { + return error(Response.Status.BAD_REQUEST, "Authentication is required."); + } + if (!user.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + + Dataset dataset; + + try { + dataset = findDatasetOrDie(dvIdtf); + } catch (WrappedResponse ex) { + return error(Response.Status.NOT_FOUND, "No such dataset"); + } + + dataset.setPidGenerator(null); + datasetService.merge(dataset); + return ok("Pid Generator reset to default: " + dataset.getEffectivePidGenerator().getId()); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index a60775cbd38..a1dbc3a1de6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -15,7 +15,6 @@ import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.GuestbookResponseServiceBean; import edu.harvard.iq.dataverse.GuestbookServiceBean; import edu.harvard.iq.dataverse.MetadataBlock; @@ -41,9 +40,13 @@ import edu.harvard.iq.dataverse.engine.command.impl.CreateExplicitGroupCommand; import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDatasetCommand; import edu.harvard.iq.dataverse.engine.command.impl.CreateRoleCommand; +import edu.harvard.iq.dataverse.engine.command.impl.DeleteCollectionQuotaCommand; import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseLinkingDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.DeleteExplicitGroupCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetSchemaCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetCollectionQuotaCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetCollectionStorageUseCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetRootCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseStorageSizeCommand; @@ -63,11 +66,15 @@ import edu.harvard.iq.dataverse.engine.command.impl.PublishDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.RemoveRoleAssigneesFromExplicitGroupCommand; import edu.harvard.iq.dataverse.engine.command.impl.RevokeRoleCommand; +import edu.harvard.iq.dataverse.engine.command.impl.SetCollectionQuotaCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseDefaultContributorRoleCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseMetadataBlocksCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateExplicitGroupCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetsCommand; +import edu.harvard.iq.dataverse.engine.command.impl.ValidateDatasetJsonCommand; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -78,8 +85,9 @@ import edu.harvard.iq.dataverse.util.json.JSONLDUtil; import edu.harvard.iq.dataverse.util.json.JsonParseException; import edu.harvard.iq.dataverse.util.json.JsonPrinter; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + import static edu.harvard.iq.dataverse.util.json.JsonPrinter.brief; -import java.io.StringReader; import java.util.Collections; import java.util.LinkedList; import java.util.List; @@ -125,7 +133,6 @@ import java.util.Optional; import java.util.stream.Collectors; import jakarta.servlet.http.HttpServletResponse; -import jakarta.validation.constraints.NotNull; import jakarta.ws.rs.WebApplicationException; import jakarta.ws.rs.core.Context; import jakarta.ws.rs.core.StreamingOutput; @@ -178,8 +185,8 @@ public Response addDataverse(@Context ContainerRequestContext crc, String body, Dataverse d; JsonObject dvJson; - try (StringReader rdr = new StringReader(body)) { - dvJson = Json.createReader(rdr).readObject(); + try { + dvJson = JsonUtil.getJsonObject(body); d = jsonParser().parseDataverse(dvJson); } catch (JsonParsingException jpe) { logger.log(Level.SEVERE, "Json: {0}", body); @@ -231,6 +238,40 @@ public Response addDataverse(@Context ContainerRequestContext crc, String body, } } + + @POST + @AuthRequired + @Path("{identifier}/validateDatasetJson") + @Consumes("application/json") + public Response validateDatasetJson(@Context ContainerRequestContext crc, String body, @PathParam("identifier") String idtf) { + User u = getRequestUser(crc); + try { + String validationMessage = execCommand(new ValidateDatasetJsonCommand(createDataverseRequest(u), findDataverseOrDie(idtf), body)); + return ok(validationMessage); + } catch (WrappedResponse ex) { + Logger.getLogger(Dataverses.class.getName()).log(Level.SEVERE, null, ex); + return ex.getResponse(); + } + } + + @GET + @AuthRequired + @Path("{identifier}/datasetSchema") + @Produces(MediaType.APPLICATION_JSON) + public Response getDatasetSchema(@Context ContainerRequestContext crc, @PathParam("identifier") String idtf) { + User u = getRequestUser(crc); + + try { + String datasetSchema = execCommand(new GetDatasetSchemaCommand(createDataverseRequest(u), findDataverseOrDie(idtf))); + JsonObject jsonObject = JsonUtil.getJsonObject(datasetSchema); + return Response.ok(jsonObject).build(); + } catch (WrappedResponse ex) { + Logger.getLogger(Dataverses.class.getName()).log(Level.SEVERE, null, ex); + return ex.getResponse(); + } + } + + @POST @AuthRequired @@ -381,7 +422,7 @@ public Response importDataset(@Context ContainerRequestContext crc, String jsonB if (!GlobalId.verifyImportCharacters(pidParam)) { return badRequest("PID parameter contains characters that are not allowed by the Dataverse application. On import, the PID must only contain characters specified in this regex: " + BundleUtil.getStringFromBundle("pid.allowedCharacters")); } - Optional maybePid = GlobalIdServiceBean.parse(pidParam); + Optional maybePid = PidProvider.parse(pidParam); if (maybePid.isPresent()) { ds.setGlobalId(maybePid.get()); } else { @@ -456,7 +497,7 @@ public Response importDatasetDdi(@Context ContainerRequestContext crc, String xm if (!GlobalId.verifyImportCharacters(pidParam)) { return badRequest("PID parameter contains characters that are not allowed by the Dataverse application. On import, the PID must only contain characters specified in this regex: " + BundleUtil.getStringFromBundle("pid.allowedCharacters")); } - Optional maybePid = GlobalIdServiceBean.parse(pidParam); + Optional maybePid = PidProvider.parse(pidParam); if (maybePid.isPresent()) { ds.setGlobalId(maybePid.get()); } else { @@ -519,12 +560,10 @@ public Response recreateDataset(@Context ContainerRequestContext crc, String jso ds.setOwner(owner); ds = JSONLDUtil.updateDatasetMDFromJsonLD(ds, jsonLDBody, metadataBlockSvc, datasetFieldSvc, false, true, licenseSvc); //ToDo - verify PID is one Dataverse can manage (protocol/authority/shoulder match) - if(! - (ds.getAuthority().equals(settingsService.getValueForKey(SettingsServiceBean.Key.Authority))&& - ds.getProtocol().equals(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol))&& - ds.getIdentifier().startsWith(settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder)))) { - throw new BadRequestException("Cannot recreate a dataset that has a PID that doesn't match the server's settings"); - } + if (!PidUtil.getPidProvider(ds.getGlobalId().getProviderId()).canManagePID()) { + throw new BadRequestException( + "Cannot recreate a dataset that has a PID that doesn't match the server's settings"); + } if(!dvObjectSvc.isGlobalIdLocallyUnique(ds.getGlobalId())) { throw new BadRequestException("Cannot recreate a dataset whose PID is already in use"); } @@ -559,8 +598,8 @@ public Response recreateDataset(@Context ContainerRequestContext crc, String jso } private Dataset parseDataset(String datasetJson) throws WrappedResponse { - try (StringReader rdr = new StringReader(datasetJson)) { - return jsonParser().parseDataset(Json.createReader(rdr).readObject()); + try { + return jsonParser().parseDataset(JsonUtil.getJsonObject(datasetJson)); } catch (JsonParsingException | JsonParseException jpe) { logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}", datasetJson); throw new WrappedResponse(error(Status.BAD_REQUEST, "Error parsing Json: " + jpe.getMessage())); @@ -570,10 +609,11 @@ private Dataset parseDataset(String datasetJson) throws WrappedResponse { @GET @AuthRequired @Path("{identifier}") - public Response viewDataverse(@Context ContainerRequestContext crc, @PathParam("identifier") String idtf) { + public Response getDataverse(@Context ContainerRequestContext crc, @PathParam("identifier") String idtf, @QueryParam("returnOwners") boolean returnOwners) { return response(req -> ok( json(execCommand(new GetDataverseCommand(req, findDataverseOrDie(idtf))), - settingsService.isTrueForKey(SettingsServiceBean.Key.ExcludeEmailFromExport, false) + settingsService.isTrueForKey(SettingsServiceBean.Key.ExcludeEmailFromExport, false), + returnOwners )), getRequestUser(crc)); } @@ -936,7 +976,62 @@ public Response getStorageSize(@Context ContainerRequestContext crc, @PathParam( execCommand(new GetDataverseStorageSizeCommand(req, findDataverseOrDie(dvIdtf), includeCached)))), getRequestUser(crc)); } + @GET + @AuthRequired + @Path("{identifier}/storage/quota") + public Response getCollectionQuota(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf) throws WrappedResponse { + try { + Long bytesAllocated = execCommand(new GetCollectionQuotaCommand(createDataverseRequest(getRequestUser(crc)), findDataverseOrDie(dvIdtf))); + if (bytesAllocated != null) { + return ok(MessageFormat.format(BundleUtil.getStringFromBundle("dataverse.storage.quota.allocation"),bytesAllocated)); + } + return ok(BundleUtil.getStringFromBundle("dataverse.storage.quota.notdefined")); + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + } + + @POST + @AuthRequired + @Path("{identifier}/storage/quota/{bytesAllocated}") + public Response setCollectionQuota(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, @PathParam("bytesAllocated") Long bytesAllocated) throws WrappedResponse { + try { + execCommand(new SetCollectionQuotaCommand(createDataverseRequest(getRequestUser(crc)), findDataverseOrDie(dvIdtf), bytesAllocated)); + return ok(BundleUtil.getStringFromBundle("dataverse.storage.quota.updated")); + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + } + + @DELETE + @AuthRequired + @Path("{identifier}/storage/quota") + public Response deleteCollectionQuota(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf) throws WrappedResponse { + try { + execCommand(new DeleteCollectionQuotaCommand(createDataverseRequest(getRequestUser(crc)), findDataverseOrDie(dvIdtf))); + return ok(BundleUtil.getStringFromBundle("dataverse.storage.quota.deleted")); + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + } + /** + * + * @param crc + * @param identifier + * @return + * @throws edu.harvard.iq.dataverse.api.AbstractApiBean.WrappedResponse + * @todo: add an optional parameter that would force the recorded storage use + * to be recalculated (or should that be a POST version of this API?) + */ + @GET + @AuthRequired + @Path("{identifier}/storage/use") + public Response getCollectionStorageUse(@Context ContainerRequestContext crc, @PathParam("identifier") String identifier) throws WrappedResponse { + return response(req -> ok(MessageFormat.format(BundleUtil.getStringFromBundle("dataverse.storage.use"), + execCommand(new GetCollectionStorageUseCommand(req, findDataverseOrDie(identifier))))), getRequestUser(crc)); + } + @GET @AuthRequired @Path("{identifier}/roles") @@ -976,6 +1071,8 @@ public Response listAssignments(@Context ContainerRequestContext crc, @PathParam */ // File tempDir; // +// TODO: Code duplicate in ThemeWidgetFragment. Maybe extract, make static and put some place else? +// Important: at least use JvmSettings.DOCROOT_DIRECTORY and not the hardcoded location! // private void createTempDir(Dataverse editDv) { // try { // File tempRoot = java.nio.file.Files.createDirectories(Paths.get("../docroot/logos/temp")).toFile(); @@ -1171,8 +1268,9 @@ public Response getGroupByOwnerAndAliasInOwner(@Context ContainerRequestContext public Response getGuestbookResponsesByDataverse(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, @QueryParam("guestbookId") Long gbId, @Context HttpServletResponse response) { + Dataverse dv; try { - Dataverse dv = findDataverseOrDie(dvIdtf); + dv = findDataverseOrDie(dvIdtf); User u = getRequestUser(crc); DataverseRequest req = createDataverseRequest(u); if (permissionSvc.request(req) @@ -1192,16 +1290,14 @@ public Response getGuestbookResponsesByDataverse(@Context ContainerRequestContex public void write(OutputStream os) throws IOException, WebApplicationException { - Dataverse dv = dataverseService.findByAlias(dvIdtf); Map customQandAs = guestbookResponseService.mapCustomQuestionAnswersAsStrings(dv.getId(), gbId); Map datasetTitles = guestbookResponseService.mapDatasetTitles(dv.getId()); - + List guestbookResults = guestbookResponseService.getGuestbookResults(dv.getId(), gbId); os.write("Guestbook, Dataset, Dataset PID, Date, Type, File Name, File Id, File PID, User Name, Email, Institution, Position, Custom Questions\n".getBytes()); for (Object[] result : guestbookResults) { StringBuilder sb = guestbookResponseService.convertGuestbookResponsesToCSV(customQandAs, datasetTitles, result); os.write(sb.toString().getBytes()); - } } }; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index af681234e82..89b22b76a7d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -103,8 +103,10 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] String auxiliaryTag = null; String auxiliaryType = null; String auxiliaryFileName = null; + // Before we do anything else, check if this download can be handled // by a redirect to remote storage (only supported on S3, as of 5.4): + if (storageIO.downloadRedirectEnabled()) { // Even if the above is true, there are a few cases where a @@ -158,7 +160,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] } } else if (dataFile.isTabularData()) { - // Many separate special cases here. + // Many separate special cases here. if (di.getConversionParam() != null) { if (di.getConversionParam().equals("format")) { @@ -179,12 +181,26 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] redirectSupported = false; } } - } else if (!di.getConversionParam().equals("noVarHeader")) { - // This is a subset request - can't do. + } else if (di.getConversionParam().equals("noVarHeader")) { + // This will work just fine, if the tab. file is + // stored without the var. header. Throw "unavailable" + // exception otherwise. + // @todo: should we actually drop support for this "noVarHeader" flag? + if (dataFile.getDataTable().isStoredWithVariableHeader()) { + throw new ServiceUnavailableException(); + } + // ... defaults to redirectSupported = true + } else { + // This must be a subset request then - can't do. + redirectSupported = false; + } + } else { + // "straight" download of the full tab-delimited file. + // can redirect, but only if stored with the variable + // header already added: + if (!dataFile.getDataTable().isStoredWithVariableHeader()) { redirectSupported = false; } - } else { - redirectSupported = false; } } } @@ -206,14 +222,15 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] redirect_url_str = null; } } - - if (systemConfig.isGlobusFileDownload() && systemConfig.getGlobusStoresList() - .contains(DataAccess.getStorageDriverFromIdentifier(dataFile.getStorageIdentifier()))) { + String driverId = DataAccess.getStorageDriverFromIdentifier(dataFile.getStorageIdentifier()); + if (systemConfig.isGlobusFileDownload() && (GlobusAccessibleStore.acceptsGlobusTransfers(driverId) || GlobusAccessibleStore.allowsGlobusReferences(driverId))) { if (di.getConversionParam() != null) { if (di.getConversionParam().equals("format")) { if ("GlobusTransfer".equals(di.getConversionParamValue())) { - redirect_url_str = globusService.getGlobusAppUrlForDataset(dataFile.getOwner(), false, dataFile); + List downloadDFList = new ArrayList(1); + downloadDFList.add(dataFile); + redirect_url_str = globusService.getGlobusAppUrlForDataset(dataFile.getOwner(), false, downloadDFList); } } } @@ -245,11 +262,16 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // finally, issue the redirect: Response response = Response.seeOther(redirect_uri).build(); logger.fine("Issuing redirect to the file location."); + // Yes, this throws an exception. It's not an exception + // as in, "bummer, something went wrong". This is how a + // redirect is produced here! throw new RedirectionException(response); } throw new ServiceUnavailableException(); } + // Past this point, this is a locally served/streamed download + if (di.getConversionParam() != null) { // Image Thumbnail and Tabular data conversion: // NOTE: only supported on local files, as of 4.0.2! @@ -283,9 +305,14 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // request any tabular-specific services. if (di.getConversionParam().equals("noVarHeader")) { - logger.fine("tabular data with no var header requested"); - storageIO.setNoVarHeader(Boolean.TRUE); - storageIO.setVarHeader(null); + if (!dataFile.getDataTable().isStoredWithVariableHeader()) { + logger.fine("tabular data with no var header requested"); + storageIO.setNoVarHeader(Boolean.TRUE); + storageIO.setVarHeader(null); + } else { + logger.fine("can't serve request for tabular data without varheader, since stored with it"); + throw new ServiceUnavailableException(); + } } else if (di.getConversionParam().equals("format")) { // Conversions, and downloads of "stored originals" are // now supported on all DataFiles for which StorageIO @@ -327,11 +354,10 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] if (variable.getDataTable().getDataFile().getId().equals(dataFile.getId())) { logger.fine("adding variable id " + variable.getId() + " to the list."); variablePositionIndex.add(variable.getFileOrder()); - if (subsetVariableHeader == null) { - subsetVariableHeader = variable.getName(); - } else { - subsetVariableHeader = subsetVariableHeader.concat("\t"); - subsetVariableHeader = subsetVariableHeader.concat(variable.getName()); + if (!dataFile.getDataTable().isStoredWithVariableHeader()) { + subsetVariableHeader = subsetVariableHeader == null + ? variable.getName() + : subsetVariableHeader.concat("\t" + variable.getName()); } } else { logger.warning("variable does not belong to this data file."); @@ -344,7 +370,17 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] try { File tempSubsetFile = File.createTempFile("tempSubsetFile", ".tmp"); TabularSubsetGenerator tabularSubsetGenerator = new TabularSubsetGenerator(); - tabularSubsetGenerator.subsetFile(storageIO.getInputStream(), tempSubsetFile.getAbsolutePath(), variablePositionIndex, dataFile.getDataTable().getCaseQuantity(), "\t"); + + long numberOfLines = dataFile.getDataTable().getCaseQuantity(); + if (dataFile.getDataTable().isStoredWithVariableHeader()) { + numberOfLines++; + } + + tabularSubsetGenerator.subsetFile(storageIO.getInputStream(), + tempSubsetFile.getAbsolutePath(), + variablePositionIndex, + numberOfLines, + "\t"); if (tempSubsetFile.exists()) { FileInputStream subsetStream = new FileInputStream(tempSubsetFile); @@ -352,8 +388,11 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] InputStreamIO subsetStreamIO = new InputStreamIO(subsetStream, subsetSize); logger.fine("successfully created subset output stream."); - subsetVariableHeader = subsetVariableHeader.concat("\n"); - subsetStreamIO.setVarHeader(subsetVariableHeader); + + if (subsetVariableHeader != null) { + subsetVariableHeader = subsetVariableHeader.concat("\n"); + subsetStreamIO.setVarHeader(subsetVariableHeader); + } String tabularFileName = storageIO.getFileName(); @@ -378,8 +417,13 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] } else { logger.fine("empty list of extra arguments."); } + // end of tab. data subset case + } else if (dataFile.getDataTable().isStoredWithVariableHeader()) { + logger.fine("tabular file stored with the var header included, no need to generate it on the fly"); + storageIO.setNoVarHeader(Boolean.TRUE); + storageIO.setVarHeader(null); } - } + } // end of tab. data file case if (storageIO == null) { //throw new WebApplicationException(Response.Status.SERVICE_UNAVAILABLE); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index 3324523afbc..2d48322c90e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -2,19 +2,9 @@ import com.google.gson.Gson; import com.google.gson.JsonObject; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetLock; -import edu.harvard.iq.dataverse.DatasetServiceBean; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.DatasetVersionServiceBean; -import edu.harvard.iq.dataverse.DataverseRequestServiceBean; -import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.EjbDataverseEngine; -import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.TermsOfUseAndAccessValidator; -import edu.harvard.iq.dataverse.UserNotificationServiceBean; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.api.auth.AuthRequired; +import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; @@ -22,15 +12,11 @@ import edu.harvard.iq.dataverse.datasetutility.DataFileTagException; import edu.harvard.iq.dataverse.datasetutility.NoFilesException; import edu.harvard.iq.dataverse.datasetutility.OptionalFileParams; +import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; -import edu.harvard.iq.dataverse.engine.command.impl.GetDataFileCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetDraftFileMetadataIfAvailableCommand; -import edu.harvard.iq.dataverse.engine.command.impl.RedetectFileTypeCommand; -import edu.harvard.iq.dataverse.engine.command.impl.RestrictFileCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UningestFileCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; +import edu.harvard.iq.dataverse.engine.command.impl.*; import edu.harvard.iq.dataverse.export.ExportService; import io.gdcc.spi.export.ExportException; import edu.harvard.iq.dataverse.externaltools.ExternalTool; @@ -44,12 +30,17 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; + +import static edu.harvard.iq.dataverse.api.ApiConstants.*; +import static edu.harvard.iq.dataverse.api.Datasets.handleVersion; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import java.io.IOException; import java.io.InputStream; +import java.io.StringReader; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -58,22 +49,20 @@ import jakarta.ejb.EJB; import jakarta.ejb.EJBException; import jakarta.inject.Inject; -import jakarta.json.Json; +import jakarta.json.*; +import jakarta.json.stream.JsonParsingException; import jakarta.servlet.http.HttpServletResponse; -import jakarta.ws.rs.Consumes; -import jakarta.ws.rs.DELETE; -import jakarta.ws.rs.GET; -import jakarta.ws.rs.POST; -import jakarta.ws.rs.PUT; -import jakarta.ws.rs.Path; -import jakarta.ws.rs.PathParam; -import jakarta.ws.rs.QueryParam; +import jakarta.ws.rs.*; import jakarta.ws.rs.container.ContainerRequestContext; import jakarta.ws.rs.core.Context; import jakarta.ws.rs.core.HttpHeaders; import jakarta.ws.rs.core.MediaType; import jakarta.ws.rs.core.Response; + +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.jsonDT; import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; +import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; + import jakarta.ws.rs.core.UriInfo; import org.glassfish.jersey.media.multipart.FormDataBodyPart; import org.glassfish.jersey.media.multipart.FormDataContentDisposition; @@ -102,7 +91,11 @@ public class Files extends AbstractApiBean { SettingsServiceBean settingsService; @Inject MakeDataCountLoggingServiceBean mdcLogService; - + @Inject + GuestbookResponseServiceBean guestbookResponseService; + @Inject + DataFileServiceBean dataFileServiceBean; + private static final Logger logger = Logger.getLogger(Files.class.getName()); @@ -217,10 +210,10 @@ public Response replaceFileInDataset( // - Will skip extra attributes which includes fileToReplaceId and forceReplace optionalFileParams = new OptionalFileParams(jsonData); } catch (DataFileTagException ex) { - return error(Response.Status.BAD_REQUEST, ex.getMessage()); + return error(BAD_REQUEST, ex.getMessage()); } } catch (ClassCastException | com.google.gson.JsonParseException ex) { - return error(Response.Status.BAD_REQUEST, BundleUtil.getStringFromBundle("file.addreplace.error.parsing")); + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("file.addreplace.error.parsing")); } } @@ -309,7 +302,7 @@ public Response replaceFileInDataset( //"Look at that! You added a file! (hey hey, it may have worked)"); } catch (NoFilesException ex) { Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); - return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); + return error(BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); } } @@ -399,7 +392,7 @@ public Response updateFileMetadata(@Context ContainerRequestContext crc, @FormDa //we get the data file to do a permissions check, if this fails it'll go to the WrappedResponse below for an ugly unpermitted error execCommand(new GetDataFileCommand(req, findDataFileOrDie(result.get(0).toString()))); - return error(Response.Status.BAD_REQUEST, "You cannot edit metadata on a dataFile that has been replaced. Please try again with the newest file id."); + return error(BAD_REQUEST, "You cannot edit metadata on a dataFile that has been replaced. Please try again with the newest file id."); } // (2) Check/Parse the JSON (if uploaded) @@ -421,10 +414,10 @@ public Response updateFileMetadata(@Context ContainerRequestContext crc, @FormDa // - Will skip extra attributes which includes fileToReplaceId and forceReplace optionalFileParams = new OptionalFileParams(jsonData); } catch (DataFileTagException ex) { - return error(Response.Status.BAD_REQUEST, ex.getMessage()); + return error(BAD_REQUEST, ex.getMessage()); } } catch (ClassCastException | com.google.gson.JsonParseException ex) { - return error(Response.Status.BAD_REQUEST, BundleUtil.getStringFromBundle("file.addreplace.error.parsing")); + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("file.addreplace.error.parsing")); } } @@ -445,7 +438,7 @@ public Response updateFileMetadata(@Context ContainerRequestContext crc, @FormDa } if (upFmd == null){ - return error(Response.Status.BAD_REQUEST, "An error has occurred attempting to update the requested DataFile. It is not part of the current version of the Dataset."); + return error(BAD_REQUEST, "An error has occurred attempting to update the requested DataFile. It is not part of the current version of the Dataset."); } jakarta.json.JsonObject jsonObject = JsonUtil.getJsonObject(jsonData); @@ -476,7 +469,7 @@ public Response updateFileMetadata(@Context ContainerRequestContext crc, @FormDa } } catch (WrappedResponse wr) { - return error(Response.Status.BAD_REQUEST, "An error has occurred attempting to update the requested DataFile, likely related to permissions."); + return error(BAD_REQUEST, "An error has occurred attempting to update the requested DataFile, likely related to permissions."); } String jsonString = upFmd.asGsonObject(true).toString(); @@ -487,79 +480,82 @@ public Response updateFileMetadata(@Context ContainerRequestContext crc, @FormDa .type(MediaType.TEXT_PLAIN) //Our plain text string is already json .build(); } - + @GET @AuthRequired - @Path("{id}/draft") - public Response getFileDataDraft(@Context ContainerRequestContext crc, @PathParam("id") String fileIdOrPersistentId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WrappedResponse, Exception { - return getFileDataResponse(getRequestUser(crc), fileIdOrPersistentId, uriInfo, headers, response, true); + @Path("{id}") + public Response getFileData(@Context ContainerRequestContext crc, + @PathParam("id") String fileIdOrPersistentId, + @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, + @QueryParam("returnDatasetVersion") boolean returnDatasetVersion, + @QueryParam("returnOwners") boolean returnOwners, + @Context UriInfo uriInfo, + @Context HttpHeaders headers) { + return response( req -> getFileDataResponse(req, fileIdOrPersistentId, DS_VERSION_LATEST, includeDeaccessioned, returnDatasetVersion, returnOwners, uriInfo, headers), getRequestUser(crc)); } - + @GET @AuthRequired - @Path("{id}") - public Response getFileData(@Context ContainerRequestContext crc, @PathParam("id") String fileIdOrPersistentId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WrappedResponse, Exception { - return getFileDataResponse(getRequestUser(crc), fileIdOrPersistentId, uriInfo, headers, response, false); + @Path("{id}/versions/{datasetVersionId}") + public Response getFileData(@Context ContainerRequestContext crc, + @PathParam("id") String fileIdOrPersistentId, + @PathParam("datasetVersionId") String datasetVersionId, + @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, + @QueryParam("returnDatasetVersion") boolean returnDatasetVersion, + @QueryParam("returnOwners") boolean returnOwners, + @Context UriInfo uriInfo, + @Context HttpHeaders headers) { + return response( req -> getFileDataResponse(req, fileIdOrPersistentId, datasetVersionId, includeDeaccessioned, returnDatasetVersion, returnOwners, uriInfo, headers), getRequestUser(crc)); } - - private Response getFileDataResponse(User user, String fileIdOrPersistentId, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response, boolean draft ){ - - DataverseRequest req; - try { - req = createDataverseRequest(user); - } catch (Exception e) { - return error(BAD_REQUEST, "Error attempting to request information. Maybe a bad API token?"); - } - final DataFile df; - try { - df = execCommand(new GetDataFileCommand(req, findDataFileOrDie(fileIdOrPersistentId))); - } catch (Exception e) { - return error(BAD_REQUEST, "Error attempting get the requested data file."); - } - FileMetadata fm; + private Response getFileDataResponse(final DataverseRequest req, + String fileIdOrPersistentId, + String datasetVersionId, + boolean includeDeaccessioned, + boolean returnDatasetVersion, + boolean returnOwners, + UriInfo uriInfo, + HttpHeaders headers) throws WrappedResponse { + final DataFile dataFile = execCommand(new GetDataFileCommand(req, findDataFileOrDie(fileIdOrPersistentId))); + FileMetadata fileMetadata = execCommand(handleVersion(datasetVersionId, new Datasets.DsVersionHandler<>() { + @Override + public Command handleLatest() { + return new GetLatestAccessibleFileMetadataCommand(req, dataFile, includeDeaccessioned); + } - if (draft) { - try { - fm = execCommand(new GetDraftFileMetadataIfAvailableCommand(req, df)); - } catch (WrappedResponse w) { - return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset."); + @Override + public Command handleDraft() { + return new GetDraftFileMetadataIfAvailableCommand(req, dataFile); } - if (null == fm) { - return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.no.draft")); + + @Override + public Command handleSpecific(long major, long minor) { + return new GetSpecificPublishedFileMetadataByDatasetVersionCommand(req, dataFile, major, minor, includeDeaccessioned); } - } else { - //first get latest published - //if not available get draft if permissible - try { - fm = df.getLatestPublishedFileMetadata(); - - } catch (UnsupportedOperationException e) { - try { - fm = execCommand(new GetDraftFileMetadataIfAvailableCommand(req, df)); - } catch (WrappedResponse w) { - return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset."); - } - if (null == fm) { - return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.no.draft")); - } + @Override + public Command handleLatestPublished() { + return new GetLatestPublishedFileMetadataCommand(req, dataFile, includeDeaccessioned); } + })); + if (fileMetadata == null) { + throw new WrappedResponse(notFound(BundleUtil.getStringFromBundle("files.api.notFoundInVersion", Arrays.asList(fileIdOrPersistentId, datasetVersionId)))); } - - if (fm.getDatasetVersion().isReleased()) { - MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountLoggingServiceBean.MakeDataCountEntry(uriInfo, headers, dvRequestService, df); + + if (fileMetadata.getDatasetVersion().isReleased()) { + MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountLoggingServiceBean.MakeDataCountEntry(uriInfo, headers, dvRequestService, dataFile); mdcLogService.logEntry(entry); } - + return Response.ok(Json.createObjectBuilder() .add("status", ApiConstants.STATUS_OK) - .add("data", json(fm)).build()) + .add("data", json(fileMetadata, returnOwners, returnDatasetVersion)).build()) .type(MediaType.APPLICATION_JSON) .build(); } + @GET @AuthRequired @Path("{id}/metadata") @@ -624,23 +620,41 @@ public Response uningestDatafile(@Context ContainerRequestContext crc, @PathPara if (dataFile == null) { return error(Response.Status.NOT_FOUND, "File not found for given id."); } - if (!dataFile.isTabularData()) { - return error(Response.Status.BAD_REQUEST, "Cannot uningest non-tabular file."); - } - - try { - DataverseRequest req = createDataverseRequest(getRequestUser(crc)); - execCommand(new UningestFileCommand(req, dataFile)); - Long dataFileId = dataFile.getId(); - dataFile = fileService.find(dataFileId); - Dataset theDataset = dataFile.getOwner(); - exportDatasetMetadata(settingsService, theDataset); - return ok("Datafile " + dataFileId + " uningested."); - } catch (WrappedResponse wr) { - return wr.getResponse(); + // Ingest never succeeded, either there was a failure or this is not a tabular + // data file + // We allow anyone who can publish to uningest in order to clear a problem + if (dataFile.isIngestProblem()) { + try { + AuthenticatedUser au = getRequestAuthenticatedUserOrDie(crc); + if (!(permissionSvc.permissionsFor(au, dataFile).contains(Permission.PublishDataset))) { + return forbidden( + "Uningesting to remove an ingest problem can only be done by those who can publish the dataset"); + } + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + dataFile.setIngestDone(); + dataFile.setIngestReport(null); + fileService.save(dataFile); + return ok("Datafile " + dataFile.getId() + " uningested."); + } else { + return error(BAD_REQUEST, + BundleUtil.getStringFromBundle("Cannot uningest non-tabular file.")); + } + } else { + try { + DataverseRequest req = createDataverseRequest(getRequestUser(crc)); + execCommand(new UningestFileCommand(req, dataFile)); + Long dataFileId = dataFile.getId(); + dataFile = fileService.find(dataFileId); + Dataset theDataset = dataFile.getOwner(); + exportDatasetMetadata(settingsService, theDataset); + return ok("Datafile " + dataFileId + " uningested."); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } } - } // reingest attempts to queue an *existing* DataFile @@ -659,7 +673,7 @@ public Response reingest(@Context ContainerRequestContext crc, @PathParam("id") try { u = getRequestAuthenticatedUserOrDie(crc); if (!u.isSuperuser()) { - return error(Response.Status.FORBIDDEN, "This API call can be used by superusers only"); + return error(FORBIDDEN, "This API call can be used by superusers only"); } } catch (WrappedResponse wr) { return wr.getResponse(); @@ -675,21 +689,21 @@ public Response reingest(@Context ContainerRequestContext crc, @PathParam("id") Dataset dataset = dataFile.getOwner(); if (dataset == null) { - return error(Response.Status.BAD_REQUEST, "Failed to locate the parent dataset for the datafile."); + return error(BAD_REQUEST, "Failed to locate the parent dataset for the datafile."); } if (dataFile.isTabularData()) { - return error(Response.Status.BAD_REQUEST, "The datafile is already ingested as Tabular."); + return error(BAD_REQUEST, "The datafile is already ingested as Tabular."); } boolean ingestLock = dataset.isLockedFor(DatasetLock.Reason.Ingest); if (ingestLock) { - return error(Response.Status.FORBIDDEN, "Dataset already locked with an Ingest lock"); + return error(FORBIDDEN, "Dataset already locked with an Ingest lock"); } if (!FileUtil.canIngestAsTabular(dataFile)) { - return error(Response.Status.BAD_REQUEST, "Tabular ingest is not supported for this file type (id: "+id+", type: "+dataFile.getContentType()+")"); + return error(BAD_REQUEST, "Tabular ingest is not supported for this file type (id: "+id+", type: "+dataFile.getContentType()+")"); } dataFile.SetIngestScheduled(); @@ -726,6 +740,11 @@ public Response reingest(@Context ContainerRequestContext crc, @PathParam("id") public Response redetectDatafile(@Context ContainerRequestContext crc, @PathParam("id") String id, @QueryParam("dryRun") boolean dryRun) { try { DataFile dataFileIn = findDataFileOrDie(id); + // Ingested Files have mimetype = text/tab-separated-values + // No need to redetect + if (dataFileIn.isTabularData()) { + return error(BAD_REQUEST, "The file is an ingested tabular file."); + } String originalContentType = dataFileIn.getContentType(); DataFile dataFileOut = execCommand(new RedetectFileTypeCommand(createDataverseRequest(getRequestUser(crc)), dataFileIn, dryRun)); NullSafeJsonBuilder result = NullSafeJsonBuilder.jsonObjectBuilder() @@ -747,7 +766,7 @@ public Response extractNcml(@Context ContainerRequestContext crc, @PathParam("id if (!au.isSuperuser()) { // We can always make a command in the future if there's a need // for non-superusers to call this API. - return error(Response.Status.FORBIDDEN, "This API call can be used by superusers only"); + return error(FORBIDDEN, "This API call can be used by superusers only"); } DataFile dataFileIn = findDataFileOrDie(id); java.nio.file.Path tempLocationPath = null; @@ -798,19 +817,17 @@ public Response getExternalToolFMParams(@Context ContainerRequestContext crc, @P return error(BAD_REQUEST, "External tool does not have file scope."); } ApiToken apiToken = null; - User u = getRequestUser(crc); - if (u instanceof AuthenticatedUser) { - apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) u); - } + User user = getRequestUser(crc); + apiToken = authSvc.getValidApiTokenForUser(user); FileMetadata target = fileSvc.findFileMetadata(fmid); if (target == null) { return error(BAD_REQUEST, "FileMetadata not found."); } - ExternalToolHandler eth = null; + URLTokenUtil eth = null; eth = new ExternalToolHandler(externalTool, target.getDataFile(), apiToken, target, locale); - return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())))); + return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())), JsonUtil.getJsonArray(externalTool.getAllowedApiCalls()))); } @GET @@ -818,4 +835,134 @@ public Response getExternalToolFMParams(@Context ContainerRequestContext crc, @P public Response getFixityAlgorithm() { return ok(systemConfig.getFileFixityChecksumAlgorithm().toString()); } + + @GET + @AuthRequired + @Path("{id}/downloadCount") + public Response getFileDownloadCount(@Context ContainerRequestContext crc, @PathParam("id") String dataFileId) { + return response(req -> { + DataFile dataFile = execCommand(new GetDataFileCommand(req, findDataFileOrDie(dataFileId))); + return ok(guestbookResponseService.getDownloadCountByDataFileId(dataFile.getId()).toString()); + }, getRequestUser(crc)); + } + + @GET + @AuthRequired + @Path("{id}/dataTables") + public Response getFileDataTables(@Context ContainerRequestContext crc, @PathParam("id") String dataFileId) { + DataFile dataFile; + try { + dataFile = findDataFileOrDie(dataFileId); + } catch (WrappedResponse e) { + return notFound("File not found for given id."); + } + if (dataFile.isRestricted() || FileUtil.isActivelyEmbargoed(dataFile)) { + DataverseRequest dataverseRequest = createDataverseRequest(getRequestUser(crc)); + boolean hasPermissionToDownloadFile = permissionSvc.requestOn(dataverseRequest, dataFile).has(Permission.DownloadFile); + if (!hasPermissionToDownloadFile) { + return forbidden("Insufficient permissions to access the requested information."); + } + } + if (!dataFile.isTabularData()) { + return badRequest(BundleUtil.getStringFromBundle("files.api.only.tabular.supported")); + } + return ok(jsonDT(dataFile.getDataTables())); + } + + @POST + @AuthRequired + @Path("{id}/metadata/categories") + @Produces(MediaType.APPLICATION_JSON) + public Response setFileCategories(@Context ContainerRequestContext crc, @PathParam("id") String dataFileId, String jsonBody) { + return response(req -> { + DataFile dataFile = execCommand(new GetDataFileCommand(req, findDataFileOrDie(dataFileId))); + jakarta.json.JsonObject jsonObject; + try (StringReader stringReader = new StringReader(jsonBody)) { + jsonObject = Json.createReader(stringReader).readObject(); + JsonArray requestedCategoriesJson = jsonObject.getJsonArray("categories"); + FileMetadata fileMetadata = dataFile.getFileMetadata(); + for (JsonValue jsonValue : requestedCategoriesJson) { + JsonString jsonString = (JsonString) jsonValue; + fileMetadata.addCategoryByName(jsonString.getString()); + } + execCommand(new UpdateDatasetVersionCommand(fileMetadata.getDataFile().getOwner(), req)); + return ok("Categories of file " + dataFileId + " updated."); + } catch (JsonParsingException jpe) { + return badRequest("Error parsing Json: " + jpe.getMessage()); + } + }, getRequestUser(crc)); + } + + @POST + @AuthRequired + @Path("{id}/metadata/tabularTags") + @Produces(MediaType.APPLICATION_JSON) + public Response setFileTabularTags(@Context ContainerRequestContext crc, @PathParam("id") String dataFileId, String jsonBody) { + return response(req -> { + DataFile dataFile = execCommand(new GetDataFileCommand(req, findDataFileOrDie(dataFileId))); + if (!dataFile.isTabularData()) { + return badRequest(BundleUtil.getStringFromBundle("files.api.only.tabular.supported")); + } + jakarta.json.JsonObject jsonObject; + try (StringReader stringReader = new StringReader(jsonBody)) { + jsonObject = Json.createReader(stringReader).readObject(); + JsonArray requestedTabularTagsJson = jsonObject.getJsonArray("tabularTags"); + for (JsonValue jsonValue : requestedTabularTagsJson) { + JsonString jsonString = (JsonString) jsonValue; + try { + dataFile.addUniqueTagByLabel(jsonString.getString()); + } catch (IllegalArgumentException iax){ + return badRequest(iax.getMessage()); + } + } + execCommand(new UpdateDatasetVersionCommand(dataFile.getOwner(), req)); + return ok("Tabular tags of file " + dataFileId + " updated."); + } catch (JsonParsingException jpe) { + return badRequest("Error parsing Json: " + jpe.getMessage()); + } + }, getRequestUser(crc)); + } + + @GET + @AuthRequired + @Path("{id}/hasBeenDeleted") + public Response getHasBeenDeleted(@Context ContainerRequestContext crc, @PathParam("id") String dataFileId) { + return response(req -> { + DataFile dataFile = execCommand(new GetDataFileCommand(req, findDataFileOrDie(dataFileId))); + return ok(dataFileServiceBean.hasBeenDeleted(dataFile)); + }, getRequestUser(crc)); + } + + /** + * @param fileIdOrPersistentId Database ID or PID of the data file. + * @param versionNumber The version of the dataset, such as 1.0, :draft, + * :latest-published, etc. + * @param includeDeaccessioned Defaults to false. + */ + @GET + @AuthRequired + @Path("{id}/versions/{dsVersionString}/citation") + public Response getFileCitationByVersion(@Context ContainerRequestContext crc, @PathParam("id") String fileIdOrPersistentId, @PathParam("dsVersionString") String versionNumber, @QueryParam("includeDeaccessioned") boolean includeDeaccessioned) { + try { + DataverseRequest req = createDataverseRequest(getRequestUser(crc)); + final DataFile df = execCommand(new GetDataFileCommand(req, findDataFileOrDie(fileIdOrPersistentId))); + Dataset ds = df.getOwner(); + DatasetVersion dsv = findDatasetVersionOrDie(req, versionNumber, ds, includeDeaccessioned, true); + if (dsv == null) { + return unauthorized(BundleUtil.getStringFromBundle("files.api.no.draftOrUnauth")); + } + + Long getDatasetVersionID = dsv.getId(); + FileMetadata fm = dataFileServiceBean.findFileMetadataByDatasetVersionIdAndDataFileId(getDatasetVersionID, df.getId()); + if (fm == null) { + return notFound(BundleUtil.getStringFromBundle("files.api.fileNotFound")); + } + boolean direct = df.isIdentifierRegistered(); + DataCitation citation = new DataCitation(fm, direct); + return ok(citation.toString(true)); + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java index d7eec9f5757..dfc9f48dd1a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java @@ -16,6 +16,7 @@ import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.json.JsonParseException; import edu.harvard.iq.dataverse.util.json.JsonPrinter; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import jakarta.json.JsonObjectBuilder; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import java.io.IOException; @@ -164,8 +165,8 @@ public Response createHarvestingClient(@Context ContainerRequestContext crc, Str return wr.getResponse(); } - try ( StringReader rdr = new StringReader(jsonBody) ) { - JsonObject json = Json.createReader(rdr).readObject(); + try { + JsonObject json = JsonUtil.getJsonObject(jsonBody); // Check that the client with this name doesn't exist yet: // (we could simply let the command fail, but that does not result @@ -261,9 +262,9 @@ public Response modifyHarvestingClient(@Context ContainerRequestContext crc, Str String ownerDataverseAlias = harvestingClient.getDataverse().getAlias(); - try ( StringReader rdr = new StringReader(jsonBody) ) { + try { DataverseRequest req = createDataverseRequest(getRequestUser(crc)); - JsonObject json = Json.createReader(rdr).readObject(); + JsonObject json = JsonUtil.getJsonObject(jsonBody); HarvestingClient newHarvestingClient = new HarvestingClient(); String newDataverseAlias = jsonParser().parseHarvestingClient(json, newHarvestingClient); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Index.java b/src/main/java/edu/harvard/iq/dataverse/api/Index.java index 4910c460b6a..c30a77acb58 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Index.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Index.java @@ -215,7 +215,7 @@ public Response clearSolrIndex() { return error(Status.INTERNAL_SERVER_ERROR, ex.getLocalizedMessage()); } } - + @GET @Path("{type}/{id}") public Response indexTypeById(@PathParam("type") String type, @PathParam("id") Long id) { @@ -326,6 +326,29 @@ public Response indexDatasetByPersistentId(@QueryParam("persistentId") String pe } } + /** + * Clears the entry for a dataset from Solr + * + * @param id numer id of the dataset + * @return response; + * will return 404 if no such dataset in the database; but will attempt to + * clear the entry from Solr regardless. + */ + @DELETE + @Path("datasets/{id}") + public Response clearDatasetFromIndex(@PathParam("id") Long id) { + Dataset dataset = datasetService.find(id); + // We'll attempt to delete the Solr document regardless of whether the + // dataset exists in the database: + String response = indexService.removeSolrDocFromIndex(IndexServiceBean.solrDocIdentifierDataset + id); + if (dataset != null) { + return ok("Sent request to clear Solr document for dataset " + id + ": " + response); + } else { + return notFound("Could not find dataset " + id + " in the database. Requested to clear from Solr anyway: " + response); + } + } + + /** * This is just a demo of the modular math logic we use for indexAll. */ diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Info.java b/src/main/java/edu/harvard/iq/dataverse/api/Info.java index 3349c34dfcc..40ce6cd25b7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Info.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Info.java @@ -1,6 +1,5 @@ package edu.harvard.iq.dataverse.api; -import edu.harvard.iq.dataverse.api.auth.AuthRequired; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -9,8 +8,6 @@ import jakarta.json.JsonValue; import jakarta.ws.rs.GET; import jakarta.ws.rs.Path; -import jakarta.ws.rs.container.ContainerRequestContext; -import jakarta.ws.rs.core.Context; import jakarta.ws.rs.core.Response; @Path("info") @@ -25,44 +22,58 @@ public class Info extends AbstractApiBean { @GET @Path("settings/:DatasetPublishPopupCustomText") public Response getDatasetPublishPopupCustomText() { - String setting = settingsService.getValueForKey(SettingsServiceBean.Key.DatasetPublishPopupCustomText); - if (setting != null) { - return ok(Json.createObjectBuilder().add("message", setting)); - } else { - return notFound("Setting " + SettingsServiceBean.Key.DatasetPublishPopupCustomText + " not found"); - } + return getSettingResponseByKey(SettingsServiceBean.Key.DatasetPublishPopupCustomText); } - + + @GET + @Path("settings/:MaxEmbargoDurationInMonths") + public Response getMaxEmbargoDurationInMonths() { + return getSettingResponseByKey(SettingsServiceBean.Key.MaxEmbargoDurationInMonths); + } + @GET - @AuthRequired @Path("version") - public Response getInfo(@Context ContainerRequestContext crc) { + public Response getInfo() { String versionStr = systemConfig.getVersion(true); String[] comps = versionStr.split("build",2); String version = comps[0].trim(); JsonValue build = comps.length > 1 ? Json.createArrayBuilder().add(comps[1].trim()).build().get(0) : JsonValue.NULL; - - return response( req -> ok( Json.createObjectBuilder().add("version", version) - .add("build", build)), getRequestUser(crc)); + return ok(Json.createObjectBuilder() + .add("version", version) + .add("build", build)); } - + @GET - @AuthRequired @Path("server") - public Response getServer(@Context ContainerRequestContext crc) { - return response( req -> ok(JvmSettings.FQDN.lookup()), getRequestUser(crc)); + public Response getServer() { + return ok(JvmSettings.FQDN.lookup()); } - + @GET - @AuthRequired @Path("apiTermsOfUse") - public Response getTermsOfUse(@Context ContainerRequestContext crc) { - return response( req -> ok(systemConfig.getApiTermsOfUse()), getRequestUser(crc)); + public Response getTermsOfUse() { + return ok(systemConfig.getApiTermsOfUse()); } - + @GET @Path("settings/incompleteMetadataViaApi") public Response getAllowsIncompleteMetadata() { return ok(JvmSettings.API_ALLOW_INCOMPLETE_METADATA.lookupOptional(Boolean.class).orElse(false)); } + + @GET + @Path("zipDownloadLimit") + public Response getZipDownloadLimit() { + long zipDownloadLimit = SystemConfig.getLongLimitFromStringOrDefault(settingsSvc.getValueForKey(SettingsServiceBean.Key.ZipDownloadLimit), SystemConfig.defaultZipDownloadLimit); + return ok(zipDownloadLimit); + } + + private Response getSettingResponseByKey(SettingsServiceBean.Key key) { + String setting = settingsService.getValueForKey(key); + if (setting != null) { + return ok(Json.createObjectBuilder().add("message", setting)); + } else { + return notFound("Setting " + key + " not found"); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/LDNInbox.java b/src/main/java/edu/harvard/iq/dataverse/api/LDNInbox.java index 05d12f1083c..6a9c608dc13 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/LDNInbox.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/LDNInbox.java @@ -1,12 +1,9 @@ package edu.harvard.iq.dataverse.api; -import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetServiceBean; import edu.harvard.iq.dataverse.DataverseRoleServiceBean; import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; -import edu.harvard.iq.dataverse.HandlenetServiceBean; import edu.harvard.iq.dataverse.MailServiceBean; import edu.harvard.iq.dataverse.RoleAssigneeServiceBean; import edu.harvard.iq.dataverse.RoleAssignment; @@ -15,6 +12,9 @@ import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.json.JSONLDUtil; import edu.harvard.iq.dataverse.util.json.JsonLDNamespace; @@ -134,13 +134,13 @@ public Response acceptMessage(String body) { .getString("@id"); if (citedResource.getString("@type").equals(JsonLDTerm.schemaOrg("Dataset").getUrl())) { logger.fine("Raw PID: " + pid); - if (pid.startsWith(DOIServiceBean.DOI_RESOLVER_URL)) { - pid = pid.replace(DOIServiceBean.DOI_RESOLVER_URL, DOIServiceBean.DOI_PROTOCOL + ":"); - } else if (pid.startsWith(HandlenetServiceBean.HDL_RESOLVER_URL)) { - pid = pid.replace(HandlenetServiceBean.HDL_RESOLVER_URL, HandlenetServiceBean.HDL_PROTOCOL + ":"); + if (pid.startsWith(AbstractDOIProvider.DOI_RESOLVER_URL)) { + pid = pid.replace(AbstractDOIProvider.DOI_RESOLVER_URL, AbstractDOIProvider.DOI_PROTOCOL + ":"); + } else if (pid.startsWith(HandlePidProvider.HDL_RESOLVER_URL)) { + pid = pid.replace(HandlePidProvider.HDL_RESOLVER_URL, HandlePidProvider.HDL_PROTOCOL + ":"); } logger.fine("Protocol PID: " + pid); - Optional id = GlobalIdServiceBean.parse(pid); + Optional id = PidProvider.parse(pid); Dataset dataset = datasetSvc.findByGlobalId(pid); if (dataset != null) { JsonObject citingResource = Json.createObjectBuilder().add("@id", citingPID) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java index 6b48dbf8415..08e776a3eb8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java @@ -2,17 +2,21 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetServiceBean; +import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.makedatacount.DatasetExternalCitations; import edu.harvard.iq.dataverse.makedatacount.DatasetExternalCitationsServiceBean; import edu.harvard.iq.dataverse.makedatacount.DatasetMetrics; import edu.harvard.iq.dataverse.makedatacount.DatasetMetricsServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DataCiteDOIProvider; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonUtil; -import java.io.FileReader; import java.io.IOException; +import java.io.InputStream; import java.net.HttpURLConnection; -import java.net.MalformedURLException; import java.net.URL; import java.util.Iterator; import java.util.List; @@ -83,26 +87,21 @@ public Response sendDataToHub() { @Path("{id}/addUsageMetricsFromSushiReport") public Response addUsageMetricsFromSushiReport(@PathParam("id") String id, @QueryParam("reportOnDisk") String reportOnDisk) { - JsonObject report; - - try (FileReader reader = new FileReader(reportOnDisk)) { - report = Json.createReader(reader).readObject(); - Dataset dataset; - try { - dataset = findDatasetOrDie(id); - List datasetMetrics = datasetMetricsService.parseSushiReport(report, dataset); - if (!datasetMetrics.isEmpty()) { - for (DatasetMetrics dm : datasetMetrics) { - datasetMetricsService.save(dm); - } + try { + JsonObject report = JsonUtil.getJsonObjectFromFile(reportOnDisk); + Dataset dataset = findDatasetOrDie(id); + List datasetMetrics = datasetMetricsService.parseSushiReport(report, dataset); + if (!datasetMetrics.isEmpty()) { + for (DatasetMetrics dm : datasetMetrics) { + datasetMetricsService.save(dm); } - } catch (WrappedResponse ex) { - Logger.getLogger(MakeDataCountApi.class.getName()).log(Level.SEVERE, null, ex); - return error(Status.BAD_REQUEST, "Wrapped response: " + ex.getLocalizedMessage()); } + } catch (WrappedResponse ex) { + logger.log(Level.SEVERE, null, ex); + return error(Status.BAD_REQUEST, "Wrapped response: " + ex.getLocalizedMessage()); } catch (IOException ex) { - System.out.print(ex.getMessage()); + logger.log(Level.WARNING, ex.getMessage()); return error(Status.BAD_REQUEST, "IOException: " + ex.getLocalizedMessage()); } String msg = "Dummy Data has been added to dataset " + id; @@ -113,10 +112,8 @@ public Response addUsageMetricsFromSushiReport(@PathParam("id") String id, @Quer @Path("/addUsageMetricsFromSushiReport") public Response addUsageMetricsFromSushiReportAll(@PathParam("id") String id, @QueryParam("reportOnDisk") String reportOnDisk) { - JsonObject report; - - try (FileReader reader = new FileReader(reportOnDisk)) { - report = Json.createReader(reader).readObject(); + try { + JsonObject report = JsonUtil.getJsonObjectFromFile(reportOnDisk); List datasetMetrics = datasetMetricsService.parseSushiReport(report, null); if (!datasetMetrics.isEmpty()) { @@ -126,7 +123,7 @@ public Response addUsageMetricsFromSushiReportAll(@PathParam("id") String id, @Q } } catch (IOException ex) { - System.out.print(ex.getMessage()); + logger.log(Level.WARNING, ex.getMessage()); return error(Status.BAD_REQUEST, "IOException: " + ex.getLocalizedMessage()); } String msg = "Usage Metrics Data has been added to all datasets from file " + reportOnDisk; @@ -135,11 +132,17 @@ public Response addUsageMetricsFromSushiReportAll(@PathParam("id") String id, @Q @POST @Path("{id}/updateCitationsForDataset") - public Response updateCitationsForDataset(@PathParam("id") String id) throws MalformedURLException, IOException { + public Response updateCitationsForDataset(@PathParam("id") String id) throws IOException { try { Dataset dataset = findDatasetOrDie(id); - String persistentId = dataset.getGlobalId().toString(); - //ToDo - if this isn't a DOI? + GlobalId pid = dataset.getGlobalId(); + PidProvider pidProvider = PidUtil.getPidProvider(pid.getProviderId()); + // Only supported for DOIs and for DataCite DOI providers + if(!DataCiteDOIProvider.TYPE.equals(pidProvider.getProviderType())) { + return error(Status.BAD_REQUEST, "Only DataCite DOI providers are supported"); + } + String persistentId = pid.toString(); + // DataCite wants "doi=", not "doi:". String authorityPlusIdentifier = persistentId.replaceFirst("doi:", ""); // Request max page size and then loop to handle multiple pages @@ -158,7 +161,10 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws Mal logger.warning("Failed to get citations from " + url.toString()); return error(Status.fromStatusCode(status), "Failed to get citations from " + url.toString()); } - JsonObject report = Json.createReader(connection.getInputStream()).readObject(); + JsonObject report; + try (InputStream inStream = connection.getInputStream()) { + report = JsonUtil.getJsonObject(inStream); + } JsonObject links = report.getJsonObject("links"); JsonArray data = report.getJsonArray("data"); Iterator iter = data.iterator(); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java b/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java index 7bb2570334b..452e5df9f9a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java @@ -547,6 +547,98 @@ public Response getDownloadsPastDays(@Context UriInfo uriInfo, @PathParam("days" return ok(jsonObj); } + /** Accounts */ + + @GET + @Path("accounts") + public Response getAccountsAllTime(@Context UriInfo uriInfo) { + return getAccountsToMonth(uriInfo, MetricsUtil.getCurrentMonth()); + } + + @GET + @Path("accounts/toMonth/{yyyymm}") + public Response getAccountsToMonth(@Context UriInfo uriInfo, @PathParam("yyyymm") String yyyymm) { + + try { + errorIfUnrecongizedQueryParamPassed(uriInfo, new String[] { }); + } catch (IllegalArgumentException ia) { + return error(BAD_REQUEST, ia.getLocalizedMessage()); + } + + String metricName = "accountsToMonth"; + String sanitizedyyyymm = MetricsUtil.sanitizeYearMonthUserInput(yyyymm); + JsonObject jsonObj = MetricsUtil.stringToJsonObject(metricsSvc.returnUnexpiredCacheMonthly(metricName, sanitizedyyyymm, null, null)); + + if (null == jsonObj) { // run query and save + Long count; + try { + count = metricsSvc.accountsToMonth(sanitizedyyyymm); + } catch (ParseException e) { + return error(BAD_REQUEST, "Unable to parse supplied date: " + e.getLocalizedMessage()); + } + jsonObj = MetricsUtil.countToJson(count).build(); + metricsSvc.save(new Metric(metricName, sanitizedyyyymm, null, null, jsonObj.toString())); + } + + return ok(jsonObj); + } + + @GET + @Path("accounts/pastDays/{days}") + public Response getAccountsPastDays(@Context UriInfo uriInfo, @PathParam("days") int days) { + + try { + errorIfUnrecongizedQueryParamPassed(uriInfo, new String[] { }); + } catch (IllegalArgumentException ia) { + return error(BAD_REQUEST, ia.getLocalizedMessage()); + } + + String metricName = "accountsPastDays"; + + if (days < 1) { + return error(BAD_REQUEST, "Invalid parameter for number of days."); + } + + JsonObject jsonObj = MetricsUtil.stringToJsonObject(metricsSvc.returnUnexpiredCacheDayBased(metricName, String.valueOf(days), null, null)); + + if (null == jsonObj) { // run query and save + Long count = metricsSvc.accountsPastDays(days); + jsonObj = MetricsUtil.countToJson(count).build(); + metricsSvc.save(new Metric(metricName, String.valueOf(days), null, null, jsonObj.toString())); + } + + return ok(jsonObj); + } + + @GET + @Path("accounts/monthly") + @Produces("text/csv, application/json") + public Response getAccountsTimeSeries(@Context Request req, @Context UriInfo uriInfo) { + + try { + errorIfUnrecongizedQueryParamPassed(uriInfo, new String[] { }); + } catch (IllegalArgumentException ia) { + return error(BAD_REQUEST, ia.getLocalizedMessage()); + } + + String metricName = "accounts"; + JsonArray jsonArray = MetricsUtil.stringToJsonArray(metricsSvc.returnUnexpiredCacheAllTime(metricName, null, null)); + + if (null == jsonArray) { // run query and save + // Only handling published right now + jsonArray = metricsSvc.accountsTimeSeries(); + metricsSvc.save(new Metric(metricName, null, null, null, jsonArray.toString())); + } + + MediaType requestedType = getVariant(req, MediaType.valueOf(FileUtil.MIME_TYPE_CSV), MediaType.APPLICATION_JSON_TYPE); + if ((requestedType != null) && (requestedType.equals(MediaType.APPLICATION_JSON_TYPE))) { + return ok(jsonArray); + } + return ok(FileUtil.jsonArrayOfObjectsToCSV(jsonArray, MetricsUtil.DATE, MetricsUtil.COUNT), MediaType.valueOf(FileUtil.MIME_TYPE_CSV), "accounts.timeseries.csv"); + } + + /** MakeDataCount */ + @GET @Path("makeDataCount/{metric}") public Response getMakeDataCountMetricCurrentMonth(@Context UriInfo uriInfo, @PathParam("metric") String metricSupplied, @QueryParam("country") String country, @QueryParam("parentAlias") String parentAlias) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Notifications.java b/src/main/java/edu/harvard/iq/dataverse/api/Notifications.java index 37c894d3071..df172f36973 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Notifications.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Notifications.java @@ -55,7 +55,6 @@ public Response getAllNotificationsForUser(@Context ContainerRequestContext crc) notificationObjectBuilder.add("id", notification.getId()); notificationObjectBuilder.add("type", type.toString()); /* FIXME - Re-add reasons for return if/when they are added to the notifications page. - if (Type.RETURNEDDS.equals(type) || Type.SUBMITTEDDS.equals(type)) { JsonArrayBuilder reasons = getReasonsForReturn(notification); for (JsonValue reason : reasons.build()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Pids.java b/src/main/java/edu/harvard/iq/dataverse/api/Pids.java index 534e42fd505..4ad57bceb58 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Pids.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Pids.java @@ -130,4 +130,41 @@ public Response deletePid(@Context ContainerRequestContext crc, @PathParam("id") } } + @GET + @AuthRequired + @Path("providers") + @Produces(MediaType.APPLICATION_JSON) + public Response getPidProviders(@Context ContainerRequestContext crc) throws WrappedResponse { + try { + getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + return ok(PidUtil.getProviders()); + } + + @GET + @AuthRequired + // The :.+ suffix allows PIDs with a / char to be entered w/o escaping + @Path("providers/{persistentId:.+}") + @Produces(MediaType.APPLICATION_JSON) + public Response getPidProviderId(@Context ContainerRequestContext crc, @PathParam("persistentId") String persistentId) throws WrappedResponse { + try { + getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + GlobalId globalId = PidUtil.parseAsGlobalID(persistentId); + if(globalId== null) { + return error(Response.Status.NOT_FOUND, "No provider found for PID"); + } else { + String providerId = globalId.getProviderId(); + if(PidUtil.getManagedProviderIds().contains(providerId)) { + return ok(globalId.getProviderId()); + } else { + return ok("PID recognized as an unmanaged " + globalId.getProtocol()); + } + } + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Prov.java b/src/main/java/edu/harvard/iq/dataverse/api/Prov.java index 37b4792920f..7f81ca20988 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Prov.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Prov.java @@ -12,7 +12,8 @@ import edu.harvard.iq.dataverse.engine.command.impl.PersistProvJsonCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.util.BundleUtil; -import java.io.StringReader; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + import java.util.HashMap; import java.util.logging.Logger; import jakarta.inject.Inject; @@ -109,11 +110,10 @@ public Response addProvFreeForm(@Context ContainerRequestContext crc, String bod if(!systemConfig.isProvCollectionEnabled()) { return error(FORBIDDEN, BundleUtil.getStringFromBundle("api.prov.error.provDisabled")); } - StringReader rdr = new StringReader(body); JsonObject jsonObj = null; try { - jsonObj = Json.createReader(rdr).readObject(); + jsonObj = JsonUtil.getJsonObject(body); } catch (JsonException ex) { return error(BAD_REQUEST, BundleUtil.getStringFromBundle("api.prov.error.freeformInvalidJson")); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Search.java b/src/main/java/edu/harvard/iq/dataverse/api/Search.java index c760534ca7b..71e2865ca4d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Search.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Search.java @@ -157,7 +157,9 @@ public Response search( numResultsPerPage, true, //SEK get query entities always for search API additional Dataset Information 6300 12/6/2019 geoPoint, - geoRadius + geoRadius, + showFacets, // facets are expensive, no need to ask for them if not requested + showRelevance // no need for highlights unless requested either ); } catch (SearchException ex) { Throwable cause = ex; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java index 87be1f14e05..b9db44b2671 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java @@ -44,6 +44,34 @@ public Response getExternalToolsforFile(@PathParam("id") String idSupplied, @Que return wr.getResponse(); } } + + @GET + @Path("datasets/{id}/externalTool/{toolId}") + public Response getExternalToolforDatasetById(@PathParam("id") String idSupplied, @PathParam("toolId") String toolId, @QueryParam("type") String typeSupplied) { + ExternalTool.Type type; + try { + type = ExternalTool.Type.fromString(typeSupplied); + } catch (IllegalArgumentException ex) { + return error(BAD_REQUEST, ex.getLocalizedMessage()); + } + Dataset dataset; + try { + dataset = findDatasetOrDie(idSupplied); + JsonArrayBuilder tools = Json.createArrayBuilder(); + List datasetTools = externalToolService.findDatasetToolsByType(type); + for (ExternalTool tool : datasetTools) { + ApiToken apiToken = externalToolService.getApiToken(getRequestApiKey()); + ExternalToolHandler externalToolHandler = new ExternalToolHandler(tool, dataset, apiToken, null); + JsonObjectBuilder toolToJson = externalToolService.getToolAsJsonWithQueryParameters(externalToolHandler); + if (tool.getId().toString().equals(toolId)) { + return ok(toolToJson); + } + } + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + return error(BAD_REQUEST, "Could not find external tool with id of " + toolId); + } @Path("files/{id}/externalTools") @GET @@ -71,5 +99,31 @@ public Response getExternalToolsForFile(@PathParam("id") String idSupplied, @Que return wr.getResponse(); } } + + @Path("files/{id}/externalTool/{toolId}") + @GET + public Response getExternalToolForFileById(@PathParam("id") String idSupplied, @QueryParam("type") String typeSupplied, @PathParam("toolId") String toolId) { + ExternalTool.Type type; + try { + type = ExternalTool.Type.fromString(typeSupplied); + } catch (IllegalArgumentException ex) { + return error(BAD_REQUEST, ex.getLocalizedMessage()); + } + try { + DataFile dataFile = findDataFileOrDie(idSupplied); + List datasetTools = externalToolService.findFileToolsByTypeAndContentType(type, dataFile.getContentType()); + for (ExternalTool tool : datasetTools) { + ApiToken apiToken = externalToolService.getApiToken(getRequestApiKey()); + ExternalToolHandler externalToolHandler = new ExternalToolHandler(tool, dataFile, apiToken, dataFile.getFileMetadata(), null); + JsonObjectBuilder toolToJson = externalToolService.getToolAsJsonWithQueryParameters(externalToolHandler); + if (externalToolService.meetsRequirements(tool, dataFile) && tool.getId().toString().equals(toolId)) { + return ok(toolToJson); + } + } + return error(BAD_REQUEST, "Could not find external tool with id of " + toolId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java b/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java index 05ba150df8e..add43ea2091 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java @@ -100,7 +100,7 @@ public String datafile(@QueryParam("fileName") String fileName, @QueryParam("fil TabularDataIngest tabDataIngest = null; try { - tabDataIngest = ingestPlugin.read(fileInputStream, null); + tabDataIngest = ingestPlugin.read(fileInputStream, false, null); } catch (IOException ingestEx) { output = output.concat("Caught an exception trying to ingest file " + fileName + ": " + ingestEx.getLocalizedMessage()); return output; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/BearerTokenAuthMechanism.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/BearerTokenAuthMechanism.java index b5a48427fa5..31f524af3f0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/auth/BearerTokenAuthMechanism.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/BearerTokenAuthMechanism.java @@ -5,7 +5,6 @@ import edu.harvard.iq.dataverse.UserServiceBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.UserRecordIdentifier; -import edu.harvard.iq.dataverse.authorization.providers.oauth2.OAuth2Exception; import edu.harvard.iq.dataverse.authorization.providers.oauth2.oidc.OIDCAuthProvider; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; @@ -23,6 +22,8 @@ public class BearerTokenAuthMechanism implements AuthMechanism { private static final String BEARER_AUTH_SCHEME = "Bearer"; + private static final Logger logger = Logger.getLogger(BearerTokenAuthMechanism.class.getCanonicalName()); + public static final String UNAUTHORIZED_BEARER_TOKEN = "Unauthorized bearer token"; public static final String INVALID_BEARER_TOKEN = "Could not parse bearer token"; public static final String BEARER_TOKEN_DETECTED_NO_OIDC_PROVIDER_CONFIGURED = "Bearer token detected, no OIDC provider configured"; @@ -31,18 +32,19 @@ public class BearerTokenAuthMechanism implements AuthMechanism { protected AuthenticationServiceBean authSvc; @Inject protected UserServiceBean userSvc; - private static final Logger logger = Logger.getLogger(BearerTokenAuthMechanism.class.getCanonicalName()); + @Override public User findUserFromRequest(ContainerRequestContext containerRequestContext) throws WrappedAuthErrorResponse { if (FeatureFlags.API_BEARER_AUTH.enabled()) { Optional bearerToken = getRequestApiKey(containerRequestContext); // No Bearer Token present, hence no user can be authenticated - if (!bearerToken.isPresent()) { + if (bearerToken.isEmpty()) { return null; } + // Validate and verify provided Bearer Token, and retrieve UserRecordIdentifier // TODO: Get the identifier from an invalidating cache to avoid lookup bursts of the same token. Tokens in the cache should be removed after some (configurable) time. - UserRecordIdentifier userInfo = verifyOidcBearerTokenAndGetUserIndentifier(bearerToken.get()); + UserRecordIdentifier userInfo = verifyOidcBearerTokenAndGetUserIdentifier(bearerToken.get()); // retrieve Authenticated User from AuthService AuthenticatedUser authUser = authSvc.lookupUser(userInfo); @@ -67,7 +69,7 @@ public User findUserFromRequest(ContainerRequestContext containerRequestContext) * @param token The string containing the encoded JWT * @return */ - private UserRecordIdentifier verifyOidcBearerTokenAndGetUserIndentifier(String token) throws WrappedAuthErrorResponse { + private UserRecordIdentifier verifyOidcBearerTokenAndGetUserIdentifier(String token) throws WrappedAuthErrorResponse { try { BearerAccessToken accessToken = BearerAccessToken.parse(token); // Get list of all authentication providers using Open ID Connect @@ -85,12 +87,15 @@ private UserRecordIdentifier verifyOidcBearerTokenAndGetUserIndentifier(String t for (OIDCAuthProvider provider : providers) { try { // The OIDCAuthProvider need to verify a Bearer Token and equip the client means to identify the corresponding AuthenticatedUser. - Optional userInfo = provider.getUserIdentifierForValidToken(accessToken); + Optional userInfo = provider.getUserIdentifier(accessToken); if(userInfo.isPresent()) { logger.log(Level.FINE, "Bearer token detected, provider {0} confirmed validity and provided identifier", provider.getId()); return userInfo.get(); } - } catch ( IOException| OAuth2Exception e) { + } catch (IOException e) { + // TODO: Just logging this is not sufficient - if there is an IO error with the one provider + // which would have validated successfully, this is not the users fault. We need to + // take note and refer to that later when occurred. logger.log(Level.FINE, "Bearer token detected, provider " + provider.getId() + " indicates an invalid Token, skipping", e); } } @@ -108,7 +113,7 @@ private UserRecordIdentifier verifyOidcBearerTokenAndGetUserIndentifier(String t * Retrieve the raw, encoded token value from the Authorization Bearer HTTP header as defined in RFC 6750 * @return An {@link Optional} either empty if not present or the raw token from the header */ - private Optional getRequestApiKey(ContainerRequestContext containerRequestContext) { + private Optional getRequestApiKey(ContainerRequestContext containerRequestContext) { String headerParamApiKey = containerRequestContext.getHeaderString(HttpHeaders.AUTHORIZATION); if (headerParamApiKey != null && headerParamApiKey.toLowerCase().startsWith(BEARER_AUTH_SCHEME.toLowerCase() + " ")) { return Optional.of(headerParamApiKey); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java index f8572144236..258661f6495 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java @@ -3,7 +3,10 @@ import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.privateurl.PrivateUrl; +import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.UrlSignerUtil; @@ -27,16 +30,18 @@ public class SignedUrlAuthMechanism implements AuthMechanism { @Inject protected AuthenticationServiceBean authSvc; - + @Inject + protected PrivateUrlServiceBean privateUrlSvc; + @Override public User findUserFromRequest(ContainerRequestContext containerRequestContext) throws WrappedAuthErrorResponse { String signedUrlRequestParameter = getSignedUrlRequestParameter(containerRequestContext); if (signedUrlRequestParameter == null) { return null; } - AuthenticatedUser authUser = getAuthenticatedUserFromSignedUrl(containerRequestContext); - if (authUser != null) { - return authUser; + User user = getAuthenticatedUserFromSignedUrl(containerRequestContext); + if (user != null) { + return user; } throw new WrappedAuthErrorResponse(RESPONSE_MESSAGE_BAD_SIGNED_URL); } @@ -45,8 +50,8 @@ private String getSignedUrlRequestParameter(ContainerRequestContext containerReq return containerRequestContext.getUriInfo().getQueryParameters().getFirst(SIGNED_URL_TOKEN); } - private AuthenticatedUser getAuthenticatedUserFromSignedUrl(ContainerRequestContext containerRequestContext) { - AuthenticatedUser authUser = null; + private User getAuthenticatedUserFromSignedUrl(ContainerRequestContext containerRequestContext) { + User user = null; // The signedUrl contains a param telling which user this is supposed to be for. // We don't trust this. So we lookup that user, and get their API key, and use // that as a secret in validating the signedURL. If the signature can't be @@ -54,17 +59,26 @@ private AuthenticatedUser getAuthenticatedUserFromSignedUrl(ContainerRequestCont // we reject the request. UriInfo uriInfo = containerRequestContext.getUriInfo(); String userId = uriInfo.getQueryParameters().getFirst(SIGNED_URL_USER); - AuthenticatedUser targetUser = authSvc.getAuthenticatedUser(userId); - ApiToken userApiToken = authSvc.findApiTokenByUser(targetUser); + User targetUser = null; + ApiToken userApiToken = null; + if (!userId.startsWith(PrivateUrlUser.PREFIX)) { + targetUser = authSvc.getAuthenticatedUser(userId); + userApiToken = authSvc.findApiTokenByUser((AuthenticatedUser) targetUser); + } else { + PrivateUrl privateUrl = privateUrlSvc.getPrivateUrlFromDatasetId(Long.parseLong(userId.substring(PrivateUrlUser.PREFIX.length()))); + userApiToken = new ApiToken(); + userApiToken.setTokenString(privateUrl.getToken()); + targetUser = privateUrlSvc.getPrivateUrlUserFromToken(privateUrl.getToken()); + } if (targetUser != null && userApiToken != null) { String signedUrl = URLDecoder.decode(uriInfo.getRequestUri().toString(), StandardCharsets.UTF_8); String requestMethod = containerRequestContext.getMethod(); String signedUrlSigningKey = JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + userApiToken.getTokenString(); boolean isSignedUrlValid = UrlSignerUtil.isValidUrl(signedUrl, userId, requestMethod, signedUrlSigningKey); if (isSignedUrlValid) { - authUser = targetUser; + user = targetUser; } } - return authUser; + return user; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java index 5bc50903be8..a81848bd7af 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java @@ -6,6 +6,7 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.DvObjectServiceBean; import edu.harvard.iq.dataverse.EjbDataverseEngine; import edu.harvard.iq.dataverse.PermissionServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; @@ -14,6 +15,7 @@ import edu.harvard.iq.dataverse.api.imports.ImportGenericServiceBean; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDatasetCommand; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.ConstraintViolationUtil; import java.util.logging.Level; @@ -44,6 +46,8 @@ public class CollectionDepositManagerImpl implements CollectionDepositManager { @EJB DatasetServiceBean datasetService; @EJB + DvObjectServiceBean dvObjectService; + @EJB PermissionServiceBean permissionService; @Inject SwordAuth swordAuth; @@ -96,13 +100,10 @@ public DepositReceipt createNew(String collectionUri, Deposit deposit, AuthCrede Dataset dataset = new Dataset(); dataset.setOwner(dvThatWillOwnDataset); - String nonNullDefaultIfKeyNotFound = ""; - String protocol = settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); - String authority = settingsService.getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); - - dataset.setProtocol(protocol); - dataset.setAuthority(authority); - //Wait until the create command before actually getting an identifier + PidProvider pidProvider = dvObjectService.getEffectivePidGenerator(dataset); + dataset.setProtocol(pidProvider.getProtocol()); + dataset.setAuthority(pidProvider.getAuthority()); + //Wait until the create command before actually getting an identifier logger.log(Level.FINE, "DS Deposit identifier: {0}", dataset.getIdentifier()); AbstractCreateDatasetCommand createDatasetCommand = new CreateNewDatasetCommand(dataset, dvReq); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionListManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionListManagerImpl.java index 084136f2b5d..541fa144e80 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionListManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionListManagerImpl.java @@ -94,7 +94,7 @@ public Feed listCollectionContents(IRI iri, AuthCredentials authCredentials, Swo throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Could not find dataverse: " + dvAlias); } } else { - throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Couldn't determine target type or identifer from URL: " + iri); + throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Couldn't determine target type or identifier from URL: " + iri); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java index 93b7dc96563..3f5345d8e0d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java @@ -6,17 +6,21 @@ import edu.harvard.iq.dataverse.DatasetServiceBean; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DataverseRequestServiceBean; import edu.harvard.iq.dataverse.EjbDataverseEngine; import edu.harvard.iq.dataverse.PermissionServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; +import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDataFilesCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.ConstraintViolationUtil; -import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.ByteArrayInputStream; import java.io.IOException; @@ -66,6 +70,8 @@ public class MediaResourceManagerImpl implements MediaResourceManager { SwordAuth swordAuth; @Inject UrlManager urlManager; + @Inject + DataverseRequestServiceBean dvRequestService; private HttpServletRequest httpRequest; @@ -213,7 +219,7 @@ public void deleteMediaResource(String uri, AuthCredentials authCredentials, Swo throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Unsupported file type found in URL: " + uri); } } else { - throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Target or identifer not specified in URL: " + uri); + throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Target or identifier not specified in URL: " + uri); } } @@ -298,37 +304,42 @@ DepositReceipt replaceOrAddFiles(String uri, Deposit deposit, AuthCredentials au */ String guessContentTypeForMe = null; List dataFiles = new ArrayList<>(); + try { - try { - CreateDataFileResult createDataFilesResponse = FileUtil.createDataFiles(editVersion, deposit.getInputStream(), uploadedZipFilename, guessContentTypeForMe, null, null, systemConfig); - dataFiles = createDataFilesResponse.getDataFiles(); - } catch (EJBException ex) { - Throwable cause = ex.getCause(); - if (cause != null) { - if (cause instanceof IllegalArgumentException) { - /** - * @todo should be safe to remove this catch of - * EJBException and IllegalArgumentException once - * this ticket is resolved: - * - * IllegalArgumentException: MALFORMED when - * uploading certain zip files - * https://github.com/IQSS/dataverse/issues/1021 - */ - throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Exception caught calling ingestService.createDataFiles. Problem with zip file, perhaps: " + cause); - } else { - throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Exception caught calling ingestService.createDataFiles: " + cause); - } + //CreateDataFileResult createDataFilesResponse = FileUtil.createDataFiles(editVersion, deposit.getInputStream(), uploadedZipFilename, guessContentTypeForMe, null, null, systemConfig); + UploadSessionQuotaLimit quota = null; + if (systemConfig.isStorageQuotasEnforced()) { + quota = dataFileService.getUploadSessionQuotaLimit(dataset); + } + Command cmd = new CreateNewDataFilesCommand(dvReq, editVersion, deposit.getInputStream(), uploadedZipFilename, guessContentTypeForMe, null, quota, null); + CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd); + dataFiles = createDataFilesResult.getDataFiles(); + } catch (CommandException ex) { + Throwable cause = ex.getCause(); + if (cause != null) { + if (cause instanceof IllegalArgumentException) { + /** + * @todo should be safe to remove this catch of + * EJBException and IllegalArgumentException once this + * ticket is resolved: + * + * IllegalArgumentException: MALFORMED when uploading + * certain zip files + * https://github.com/IQSS/dataverse/issues/1021 + */ + throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Unable to add file(s) to dataset. Problem with zip file, perhaps: " + cause); } else { - throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Exception caught calling ingestService.createDataFiles. No cause: " + ex.getMessage()); + throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Unable to add file(s) to dataset: " + cause); } - } /*TODO: L.A. 4.6! catch (FileExceedsMaxSizeException ex) { + } else { + throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Unable to add file(s) to dataset: " + ex.getMessage()); + } + } + /*TODO: L.A. 4.6! catch (FileExceedsMaxSizeException ex) { throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Exception caught calling ingestService.createDataFiles: " + ex.getMessage()); //Logger.getLogger(MediaResourceManagerImpl.class.getName()).log(Level.SEVERE, null, ex); - }*/ - } catch (IOException ex) { - throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Unable to add file(s) to dataset: " + ex.getMessage()); - } + }*/ + if (!dataFiles.isEmpty()) { Set constraintViolations = editVersion.validate(); if (constraintViolations.size() > 0) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java index 8f7934dd528..85d4868605d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java @@ -828,9 +828,9 @@ private HashSet processGeoBndBox(XMLStreamReader xmlr) throws XMLStrea } else if (xmlr.getLocalName().equals("eastBL")) { addToSet(set,"eastLongitude", parseText(xmlr)); } else if (xmlr.getLocalName().equals("southBL")) { - addToSet(set,"southLongitude", parseText(xmlr)); + addToSet(set,"southLatitude", parseText(xmlr)); } else if (xmlr.getLocalName().equals("northBL")) { - addToSet(set,"northLongitude", parseText(xmlr)); + addToSet(set,"northLatitude", parseText(xmlr)); } } else if (event == XMLStreamConstants.END_ELEMENT) { if (xmlr.getLocalName().equals("geoBndBox")) break; @@ -1338,6 +1338,7 @@ private void processProdStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th List> producers = new ArrayList<>(); List> grants = new ArrayList<>(); List> software = new ArrayList<>(); + List prodPlac = new ArrayList<>(); for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { if (event == XMLStreamConstants.START_ELEMENT) { @@ -1353,9 +1354,7 @@ private void processProdStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th } else if (xmlr.getLocalName().equals("prodDate")) { citation.getFields().add(FieldDTO.createPrimitiveFieldDTO("productionDate", parseDate(xmlr, "prodDate"))); } else if (xmlr.getLocalName().equals("prodPlac")) { - List prodPlac = new ArrayList<>(); - prodPlac.add(parseText(xmlr, "prodPlac")); - citation.getFields().add(FieldDTO.createMultiplePrimitiveFieldDTO(DatasetFieldConstant.productionPlace, prodPlac)); + prodPlac.add(parseText(xmlr)); } else if (xmlr.getLocalName().equals("software")) { HashSet set = new HashSet<>(); addToSet(set,"softwareVersion", xmlr.getAttributeValue(null, "version")); @@ -1388,6 +1387,9 @@ private void processProdStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th if (producers.size()>0) { citation.getFields().add(FieldDTO.createMultipleCompoundFieldDTO("producer", producers)); } + if (prodPlac.size() > 0) { + citation.getFields().add(FieldDTO.createMultiplePrimitiveFieldDTO(DatasetFieldConstant.productionPlace, prodPlac)); + } return; } } @@ -1397,6 +1399,7 @@ private void processProdStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th private void processTitlStmt(XMLStreamReader xmlr, DatasetDTO datasetDTO) throws XMLStreamException, ImportException { MetadataBlockDTO citation = datasetDTO.getDatasetVersion().getMetadataBlocks().get("citation"); List> otherIds = new ArrayList<>(); + List altTitles = new ArrayList<>(); for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { if (event == XMLStreamConstants.START_ELEMENT) { @@ -1407,8 +1410,7 @@ private void processTitlStmt(XMLStreamReader xmlr, DatasetDTO datasetDTO) throws FieldDTO field = FieldDTO.createPrimitiveFieldDTO("subtitle", parseText(xmlr)); citation.getFields().add(field); } else if (xmlr.getLocalName().equals("altTitl")) { - FieldDTO field = FieldDTO.createPrimitiveFieldDTO("alternativeTitle", parseText(xmlr)); - citation.getFields().add(field); + altTitles.add(parseText(xmlr)); } else if (xmlr.getLocalName().equals("IDNo")) { if ( AGENCY_HANDLE.equals( xmlr.getAttributeValue(null, "agency") ) || AGENCY_DOI.equals( xmlr.getAttributeValue(null, "agency") ) ) { importGenericService.reassignIdentifierAsGlobalId(parseText(xmlr), datasetDTO); @@ -1436,6 +1438,10 @@ private void processTitlStmt(XMLStreamReader xmlr, DatasetDTO datasetDTO) throws if (otherIds.size()>0) { citation.addField(FieldDTO.createMultipleCompoundFieldDTO("otherId", otherIds)); } + if (!altTitles.isEmpty()) { + FieldDTO field = FieldDTO.createMultiplePrimitiveFieldDTO(DatasetFieldConstant.alternativeTitle, altTitles); + citation.getFields().add(field); + } return; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java index f7a6cf54dd5..6068ec45e4f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java @@ -2,7 +2,6 @@ import com.google.gson.Gson; -import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; import edu.harvard.iq.dataverse.DatasetFieldConstant; @@ -11,13 +10,14 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.ForeignMetadataFieldMapping; import edu.harvard.iq.dataverse.ForeignMetadataFormatMapping; -import edu.harvard.iq.dataverse.HandlenetServiceBean; import edu.harvard.iq.dataverse.MetadataBlockServiceBean; import edu.harvard.iq.dataverse.api.dto.*; import edu.harvard.iq.dataverse.api.dto.FieldDTO; import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; import edu.harvard.iq.dataverse.license.LicenseServiceBean; -import edu.harvard.iq.dataverse.pidproviders.PermaLinkPidProviderServiceBean; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; +import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.json.JsonParseException; @@ -352,7 +352,7 @@ private String getOtherIdFromDTO(DatasetVersionDTO datasetVersionDTO) { if (!otherIds.isEmpty()) { // We prefer doi or hdl identifiers like "doi:10.7910/DVN/1HE30F" for (String otherId : otherIds) { - if (otherId.startsWith(DOIServiceBean.DOI_PROTOCOL) || otherId.startsWith(HandlenetServiceBean.HDL_PROTOCOL) || otherId.startsWith(DOIServiceBean.DOI_RESOLVER_URL) || otherId.startsWith(HandlenetServiceBean.HDL_RESOLVER_URL) || otherId.startsWith(DOIServiceBean.HTTP_DOI_RESOLVER_URL) || otherId.startsWith(HandlenetServiceBean.HTTP_HDL_RESOLVER_URL) || otherId.startsWith(DOIServiceBean.DXDOI_RESOLVER_URL) || otherId.startsWith(DOIServiceBean.HTTP_DXDOI_RESOLVER_URL)) { + if (otherId.startsWith(AbstractDOIProvider.DOI_PROTOCOL) || otherId.startsWith(HandlePidProvider.HDL_PROTOCOL) || otherId.startsWith(AbstractDOIProvider.DOI_RESOLVER_URL) || otherId.startsWith(HandlePidProvider.HDL_RESOLVER_URL) || otherId.startsWith(AbstractDOIProvider.HTTP_DOI_RESOLVER_URL) || otherId.startsWith(HandlePidProvider.HTTP_HDL_RESOLVER_URL) || otherId.startsWith(AbstractDOIProvider.DXDOI_RESOLVER_URL) || otherId.startsWith(AbstractDOIProvider.HTTP_DXDOI_RESOLVER_URL)) { return otherId; } } @@ -361,7 +361,7 @@ private String getOtherIdFromDTO(DatasetVersionDTO datasetVersionDTO) { try { HandleResolver hr = new HandleResolver(); hr.resolveHandle(otherId); - return HandlenetServiceBean.HDL_PROTOCOL + ":" + otherId; + return HandlePidProvider.HDL_PROTOCOL + ":" + otherId; } catch (HandleException e) { logger.fine("Not a valid handle: " + e.toString()); } @@ -388,7 +388,7 @@ public String reassignIdentifierAsGlobalId(String identifierString, DatasetDTO d String protocol = identifierString.substring(0, index1); - if (DOIServiceBean.DOI_PROTOCOL.equals(protocol) || HandlenetServiceBean.HDL_PROTOCOL.equals(protocol) || PermaLinkPidProviderServiceBean.PERMA_PROTOCOL.equals(protocol)) { + if (AbstractDOIProvider.DOI_PROTOCOL.equals(protocol) || HandlePidProvider.HDL_PROTOCOL.equals(protocol) || PermaLinkPidProvider.PERMA_PROTOCOL.equals(protocol)) { logger.fine("Processing hdl:- or doi:- or perma:-style identifier : "+identifierString); } else if ("http".equalsIgnoreCase(protocol) || "https".equalsIgnoreCase(protocol)) { @@ -396,21 +396,21 @@ public String reassignIdentifierAsGlobalId(String identifierString, DatasetDTO d // We also recognize global identifiers formatted as global resolver URLs: //ToDo - refactor index1 always has -1 here so that we can use index1+1 later //ToDo - single map of protocol/url, are all three cases the same then? - if (identifierString.startsWith(HandlenetServiceBean.HDL_RESOLVER_URL) || identifierString.startsWith(HandlenetServiceBean.HTTP_HDL_RESOLVER_URL)) { + if (identifierString.startsWith(HandlePidProvider.HDL_RESOLVER_URL) || identifierString.startsWith(HandlePidProvider.HTTP_HDL_RESOLVER_URL)) { logger.fine("Processing Handle identifier formatted as a resolver URL: "+identifierString); - protocol = HandlenetServiceBean.HDL_PROTOCOL; - index1 = (identifierString.startsWith(HandlenetServiceBean.HDL_RESOLVER_URL)) ? HandlenetServiceBean.HDL_RESOLVER_URL.length() - 1 : HandlenetServiceBean.HTTP_HDL_RESOLVER_URL.length() - 1; + protocol = HandlePidProvider.HDL_PROTOCOL; + index1 = (identifierString.startsWith(HandlePidProvider.HDL_RESOLVER_URL)) ? HandlePidProvider.HDL_RESOLVER_URL.length() - 1 : HandlePidProvider.HTTP_HDL_RESOLVER_URL.length() - 1; index2 = identifierString.indexOf("/", index1 + 1); - } else if (identifierString.startsWith(DOIServiceBean.DOI_RESOLVER_URL) || identifierString.startsWith(DOIServiceBean.HTTP_DOI_RESOLVER_URL) || identifierString.startsWith(DOIServiceBean.DXDOI_RESOLVER_URL) || identifierString.startsWith(DOIServiceBean.HTTP_DXDOI_RESOLVER_URL)) { + } else if (identifierString.startsWith(AbstractDOIProvider.DOI_RESOLVER_URL) || identifierString.startsWith(AbstractDOIProvider.HTTP_DOI_RESOLVER_URL) || identifierString.startsWith(AbstractDOIProvider.DXDOI_RESOLVER_URL) || identifierString.startsWith(AbstractDOIProvider.HTTP_DXDOI_RESOLVER_URL)) { logger.fine("Processing DOI identifier formatted as a resolver URL: "+identifierString); - protocol = DOIServiceBean.DOI_PROTOCOL; - identifierString = identifierString.replace(DOIServiceBean.DXDOI_RESOLVER_URL, DOIServiceBean.DOI_RESOLVER_URL); - identifierString = identifierString.replace(DOIServiceBean.HTTP_DXDOI_RESOLVER_URL, DOIServiceBean.HTTP_DOI_RESOLVER_URL); - index1 = (identifierString.startsWith(DOIServiceBean.DOI_RESOLVER_URL)) ? DOIServiceBean.DOI_RESOLVER_URL.length() - 1 : DOIServiceBean.HTTP_DOI_RESOLVER_URL.length() - 1; + protocol = AbstractDOIProvider.DOI_PROTOCOL; + identifierString = identifierString.replace(AbstractDOIProvider.DXDOI_RESOLVER_URL, AbstractDOIProvider.DOI_RESOLVER_URL); + identifierString = identifierString.replace(AbstractDOIProvider.HTTP_DXDOI_RESOLVER_URL, AbstractDOIProvider.HTTP_DOI_RESOLVER_URL); + index1 = (identifierString.startsWith(AbstractDOIProvider.DOI_RESOLVER_URL)) ? AbstractDOIProvider.DOI_RESOLVER_URL.length() - 1 : AbstractDOIProvider.HTTP_DOI_RESOLVER_URL.length() - 1; index2 = identifierString.indexOf("/", index1 + 1); - } else if (identifierString.startsWith(PermaLinkPidProviderServiceBean.PERMA_RESOLVER_URL + Dataset.TARGET_URL)) { - protocol = PermaLinkPidProviderServiceBean.PERMA_PROTOCOL; - index1 = PermaLinkPidProviderServiceBean.PERMA_RESOLVER_URL.length() + + Dataset.TARGET_URL.length() - 1; + } else if (identifierString.startsWith(PermaLinkPidProvider.PERMA_RESOLVER_URL + Dataset.TARGET_URL)) { + protocol = PermaLinkPidProvider.PERMA_PROTOCOL; + index1 = PermaLinkPidProvider.PERMA_RESOLVER_URL.length() + + Dataset.TARGET_URL.length() - 1; index2 = identifierString.indexOf("/", index1 + 1); } else { logger.warning("HTTP Url in supplied as the identifier is neither a Handle nor DOI resolver: "+identifierString); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java index bcb67b180c8..39977190691 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java @@ -36,12 +36,14 @@ import edu.harvard.iq.dataverse.util.ConstraintViolationUtil; import edu.harvard.iq.dataverse.util.json.JsonParseException; import edu.harvard.iq.dataverse.util.json.JsonParser; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.license.LicenseServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; + import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; -import java.io.StringReader; import java.nio.file.Files; import java.util.ArrayList; import java.util.Date; @@ -60,7 +62,6 @@ import jakarta.json.Json; import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; -import jakarta.json.JsonReader; import jakarta.persistence.EntityManager; import jakarta.persistence.PersistenceContext; import jakarta.validation.ConstraintViolation; @@ -259,9 +260,8 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve throw new ImportException("Failed to transform XML metadata format "+metadataFormat+" into a DatasetDTO"); } } - - JsonReader jsonReader = Json.createReader(new StringReader(json)); - JsonObject obj = jsonReader.readObject(); + + JsonObject obj = JsonUtil.getJsonObject(json); //and call parse Json to read it into a dataset try { JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService, licenseService, harvestingClient); @@ -396,10 +396,8 @@ public JsonObject ddiToJson(String xmlToParse) throws ImportException, XMLStream // convert DTO to Json, Gson gson = new GsonBuilder().setPrettyPrinting().create(); String json = gson.toJson(dsDTO); - JsonReader jsonReader = Json.createReader(new StringReader(json)); - JsonObject obj = jsonReader.readObject(); - return obj; + return JsonUtil.getJsonObject(json); } public JsonObjectBuilder doImport(DataverseRequest dataverseRequest, Dataverse owner, String xmlToParse, String fileName, ImportType importType, PrintWriter cleanupLog) throws ImportException, IOException { @@ -416,8 +414,7 @@ public JsonObjectBuilder doImport(DataverseRequest dataverseRequest, Dataverse o // convert DTO to Json, Gson gson = new GsonBuilder().setPrettyPrinting().create(); String json = gson.toJson(dsDTO); - JsonReader jsonReader = Json.createReader(new StringReader(json)); - JsonObject obj = jsonReader.readObject(); + JsonObject obj = JsonUtil.getJsonObject(json); //and call parse Json to read it into a dataset try { JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService, licenseService); @@ -426,8 +423,9 @@ public JsonObjectBuilder doImport(DataverseRequest dataverseRequest, Dataverse o // For ImportType.NEW, if the user supplies a global identifier, and it's not a protocol // we support, it will be rejected. + if (importType.equals(ImportType.NEW)) { - if (ds.getGlobalId().asString() != null && !ds.getProtocol().equals(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, ""))) { + if (ds.getGlobalId().asString() != null && !PidUtil.getPidProvider(ds.getGlobalId().getProviderId()).canManagePID()) { throw new ImportException("Could not register id " + ds.getGlobalId().asString() + ", protocol not supported"); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationProvidersRegistrationServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationProvidersRegistrationServiceBean.java index a93d01527a0..fbad14645bc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationProvidersRegistrationServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationProvidersRegistrationServiceBean.java @@ -17,6 +17,7 @@ import edu.harvard.iq.dataverse.authorization.providers.oauth2.OAuth2AuthenticationProviderFactory; import edu.harvard.iq.dataverse.authorization.providers.oauth2.oidc.OIDCAuthenticationProviderFactory; import edu.harvard.iq.dataverse.authorization.providers.shib.ShibAuthenticationProviderFactory; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean; import java.util.HashMap; import java.util.Map; @@ -121,6 +122,15 @@ public void startup() { logger.log(Level.SEVERE, "Exception setting up the authentication provider '" + row.getId() + "': " + ex.getMessage(), ex); } }); + + // Add providers registered via MPCONFIG + if (JvmSettings.OIDC_ENABLED.lookupOptional(Boolean.class).orElse(false)) { + try { + registerProvider(OIDCAuthenticationProviderFactory.buildFromSettings()); + } catch (AuthorizationSetupException e) { + logger.log(Level.SEVERE, "Exception setting up an OIDC auth provider via MicroProfile Config", e); + } + } } private void registerProviderFactory(AuthenticationProviderFactory aFactory) diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java index 106a83a4ad1..1c0f5010059 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java @@ -21,10 +21,14 @@ import edu.harvard.iq.dataverse.authorization.providers.shib.ShibAuthenticationProvider; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; +import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailData; import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailServiceBean; import edu.harvard.iq.dataverse.passwordreset.PasswordResetData; import edu.harvard.iq.dataverse.passwordreset.PasswordResetServiceBean; +import edu.harvard.iq.dataverse.privateurl.PrivateUrl; +import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean; @@ -118,6 +122,9 @@ public class AuthenticationServiceBean { @EJB SavedSearchServiceBean savedSearchService; + @EJB + PrivateUrlServiceBean privateUrlService; + @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; @@ -580,7 +587,7 @@ public boolean updateProvider( AuthenticatedUser authenticatedUser, String authe * {@code userDisplayInfo}, a lookup entry for them based * UserIdentifier.getLookupStringPerAuthProvider (within the supplied * authentication provider), and internal user identifier (used for role - * assignments, etc.) based on UserIdentifier.getInternalUserIdentifer. + * assignments, etc.) based on UserIdentifier.getInternalUserIdentifier. * * @param userRecordId * @param proposedAuthenticatedUserIdentifier @@ -605,20 +612,21 @@ public AuthenticatedUser createAuthenticatedUser(UserRecordIdentifier userRecord proposedAuthenticatedUserIdentifier = proposedAuthenticatedUserIdentifier.trim(); } // we now select a username for the generated AuthenticatedUser, or give up - String internalUserIdentifer = proposedAuthenticatedUserIdentifier; + String internalUserIdentifier = proposedAuthenticatedUserIdentifier; // TODO should lock table authenticated users for write here - if ( identifierExists(internalUserIdentifer) ) { + if ( identifierExists(internalUserIdentifier) ) { if ( ! generateUniqueIdentifier ) { return null; } int i=1; - String identifier = internalUserIdentifer + i; + String identifier = internalUserIdentifier + i; while ( identifierExists(identifier) ) { i += 1; + identifier = internalUserIdentifier + i; } authenticatedUser.setUserIdentifier(identifier); } else { - authenticatedUser.setUserIdentifier(internalUserIdentifer); + authenticatedUser.setUserIdentifier(internalUserIdentifier); } authenticatedUser = save( authenticatedUser ); // TODO should unlock table authenticated users for write here @@ -931,14 +939,45 @@ public List getWorkflowCommentsByAuthenticatedUser(Authenticat return query.getResultList(); } - public ApiToken getValidApiTokenForUser(AuthenticatedUser user) { + /** + * This method gets a valid api token for an AuthenticatedUser, creating a new + * token if one doesn't exist or if the token is expired. + * + * @param user + * @return + */ + public ApiToken getValidApiTokenForAuthenticatedUser(AuthenticatedUser user) { ApiToken apiToken = null; apiToken = findApiTokenByUser(user); - if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) { + if ((apiToken == null) || apiToken.isExpired()) { logger.fine("Created apiToken for user: " + user.getIdentifier()); apiToken = generateApiTokenForUser(user); } return apiToken; } + /** + * Gets a token for an AuthenticatedUser or a PrivateUrlUser. It will create a + * new token if needed for an AuthenticatedUser. Note that, for a PrivateUrlUser, this method creates a token + * with a temporary AuthenticateUser that only has a userIdentifier - needed in generating signed Urls. + * @param user + * @return a token or null (i.e. if the user is not an AuthenticatedUser or PrivateUrlUser) + */ + + public ApiToken getValidApiTokenForUser(User user) { + ApiToken apiToken = null; + if (user instanceof AuthenticatedUser) { + apiToken = getValidApiTokenForAuthenticatedUser((AuthenticatedUser) user); + } else if (user instanceof PrivateUrlUser) { + PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; + + PrivateUrl privateUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); + apiToken = new ApiToken(); + apiToken.setTokenString(privateUrl.getToken()); + AuthenticatedUser au = new AuthenticatedUser(); + au.setUserIdentifier(privateUrlUser.getIdentifier()); + apiToken.setAuthenticatedUser(au); + } + return apiToken; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/UserIdentifier.java b/src/main/java/edu/harvard/iq/dataverse/authorization/UserIdentifier.java index 1ac2c7583d6..312910e52c7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/UserIdentifier.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/UserIdentifier.java @@ -25,18 +25,31 @@ public class UserIdentifier { /** * The String used in the permission system to assign roles, for example. */ - String internalUserIdentifer; + String internalUserIdentifier; - public UserIdentifier(String lookupStringPerAuthProvider, String internalUserIdentifer) { + public UserIdentifier(String lookupStringPerAuthProvider, String internalUserIdentifier) { this.lookupStringPerAuthProvider = lookupStringPerAuthProvider; - this.internalUserIdentifer = internalUserIdentifer; + this.internalUserIdentifier = internalUserIdentifier; } public String getLookupStringPerAuthProvider() { return lookupStringPerAuthProvider; } + /** + * @deprecated because of a typo; use {@link #getInternalUserIdentifier()} instead + * @see #getInternalUserIdentifier() + * @return the internal user identifier + */ + @Deprecated public String getInternalUserIdentifer() { - return internalUserIdentifer; + return getInternalUserIdentifier(); + } + + /** + * @return the internal user identifier + */ + public String getInternalUserIdentifier() { + return internalUserIdentifier; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/UserRecordIdentifier.java b/src/main/java/edu/harvard/iq/dataverse/authorization/UserRecordIdentifier.java index 963ee592bbf..dfbb43fae46 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/UserRecordIdentifier.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/UserRecordIdentifier.java @@ -2,6 +2,8 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import java.util.Objects; + /** * Identifies a user using two strings: *
          @@ -38,4 +40,16 @@ public AuthenticatedUserLookup createAuthenticatedUserLookup( AuthenticatedUser return new AuthenticatedUserLookup(userIdInRepo, repoId, u); } + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof UserRecordIdentifier)) return false; + UserRecordIdentifier that = (UserRecordIdentifier) o; + return Objects.equals(repoId, that.repoId) && Objects.equals(getUserIdInRepo(), that.getUserIdInRepo()); + } + + @Override + public int hashCode() { + return Objects.hash(repoId, getUserIdInRepo()); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java index dc4644dfccd..a0e3f899443 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java @@ -488,6 +488,7 @@ public void displayNotification() { break; case REQUESTFILEACCESS: + case REQUESTEDFILEACCESS: DataFile file = fileService.find(userNotification.getObjectId()); if (file != null) { userNotification.setTheObject(file.getOwner()); diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/AbstractOAuth2AuthenticationProvider.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/AbstractOAuth2AuthenticationProvider.java index 48efe1e2592..7fd7bf3e885 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/AbstractOAuth2AuthenticationProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/AbstractOAuth2AuthenticationProvider.java @@ -139,6 +139,7 @@ public OAuth20Service getService(String callbackUrl) { * Receive user data from OAuth2 provider after authn/z has been successfull. (Callback view uses this) * Request a token and access the resource, parse output and return user details. * @param code The authz code sent from the provider + * @param state The state which was communicated between us and the provider, identifying the exact request * @param redirectUrl The redirect URL (some providers require this when fetching the access token, e. g. Google) * @return A user record containing all user details accessible for us * @throws IOException Thrown when communication with the provider fails @@ -146,7 +147,7 @@ public OAuth20Service getService(String callbackUrl) { * @throws InterruptedException Thrown when the requests thread is failing * @throws ExecutionException Thrown when the requests thread is failing */ - public OAuth2UserRecord getUserRecord(String code, String redirectUrl) + public OAuth2UserRecord getUserRecord(String code, String state, String redirectUrl) throws IOException, OAuth2Exception, InterruptedException, ExecutionException { OAuth20Service service = getService(redirectUrl); diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBean.java index 99df2375a79..0fd0852b4df 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBean.java @@ -100,7 +100,7 @@ public void exchangeCodeForToken() throws IOException { if (oIdp.isPresent() && code.isPresent()) { AbstractOAuth2AuthenticationProvider idp = oIdp.get(); - oauthUser = idp.getUserRecord(code.get(), systemConfig.getOAuth2CallbackUrl()); + oauthUser = idp.getUserRecord(code.get(), req.getParameter("state"), systemConfig.getOAuth2CallbackUrl()); // Throw an error if this authentication method is disabled: // (it's not clear if it's possible at all, for somebody to get here with diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthProvider.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthProvider.java index 6a031262561..5eb2b391eb7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthProvider.java @@ -1,5 +1,7 @@ package edu.harvard.iq.dataverse.authorization.providers.oauth2.oidc; +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; import com.github.scribejava.core.builder.api.DefaultApi20; import com.nimbusds.oauth2.sdk.AuthorizationCode; import com.nimbusds.oauth2.sdk.AuthorizationCodeGrant; @@ -18,6 +20,8 @@ import com.nimbusds.oauth2.sdk.id.ClientID; import com.nimbusds.oauth2.sdk.id.Issuer; import com.nimbusds.oauth2.sdk.id.State; +import com.nimbusds.oauth2.sdk.pkce.CodeChallengeMethod; +import com.nimbusds.oauth2.sdk.pkce.CodeVerifier; import com.nimbusds.oauth2.sdk.token.BearerAccessToken; import com.nimbusds.openid.connect.sdk.AuthenticationRequest; import com.nimbusds.openid.connect.sdk.Nonce; @@ -34,14 +38,20 @@ import edu.harvard.iq.dataverse.authorization.providers.oauth2.AbstractOAuth2AuthenticationProvider; import edu.harvard.iq.dataverse.authorization.providers.oauth2.OAuth2Exception; import edu.harvard.iq.dataverse.authorization.providers.oauth2.OAuth2UserRecord; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.BundleUtil; import java.io.IOException; import java.net.URI; +import java.time.Duration; +import java.time.temporal.ChronoUnit; import java.util.Arrays; import java.util.List; +import java.util.Map; import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; +import java.util.logging.Level; import java.util.logging.Logger; /** @@ -55,15 +65,33 @@ public class OIDCAuthProvider extends AbstractOAuth2AuthenticationProvider { protected String title = "Open ID Connect"; protected List scope = Arrays.asList("openid", "email", "profile"); - Issuer issuer; - ClientAuthentication clientAuth; - OIDCProviderMetadata idpMetadata; + final Issuer issuer; + final ClientAuthentication clientAuth; + final OIDCProviderMetadata idpMetadata; + final boolean pkceEnabled; + final CodeChallengeMethod pkceMethod; - public OIDCAuthProvider(String aClientId, String aClientSecret, String issuerEndpointURL) throws AuthorizationSetupException { + /** + * Using PKCE, we create and send a special {@link CodeVerifier}. This contains a secret + * we need again when verifying the response by the provider, thus the cache. + * To be absolutely sure this may not be abused to DDoS us and not let unused verifiers rot, + * use an evicting cache implementation and not a standard map. + */ + private final Cache verifierCache = Caffeine.newBuilder() + .maximumSize(JvmSettings.OIDC_PKCE_CACHE_MAXSIZE.lookup(Integer.class)) + .expireAfterWrite(Duration.of(JvmSettings.OIDC_PKCE_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.SECONDS)) + .build(); + + public OIDCAuthProvider(String aClientId, String aClientSecret, String issuerEndpointURL, + boolean pkceEnabled, String pkceMethod) throws AuthorizationSetupException { this.clientSecret = aClientSecret; // nedded for state creation this.clientAuth = new ClientSecretBasic(new ClientID(aClientId), new Secret(aClientSecret)); this.issuer = new Issuer(issuerEndpointURL); - getMetadata(); + + this.idpMetadata = getMetadata(); + + this.pkceEnabled = pkceEnabled; + this.pkceMethod = CodeChallengeMethod.parse(pkceMethod); } /** @@ -75,7 +103,9 @@ public OIDCAuthProvider(String aClientId, String aClientSecret, String issuerEnd * @return false */ @Override - public boolean isDisplayIdentifier() { return false; } + public boolean isDisplayIdentifier() { + return false; + } /** * Setup metadata from OIDC provider during creation of the provider representation @@ -83,9 +113,14 @@ public OIDCAuthProvider(String aClientId, String aClientSecret, String issuerEnd * @throws IOException when sth. goes wrong with the retrieval * @throws ParseException when the metadata is not parsable */ - void getMetadata() throws AuthorizationSetupException { + OIDCProviderMetadata getMetadata() throws AuthorizationSetupException { try { - this.idpMetadata = getMetadata(this.issuer); + var metadata = getMetadata(this.issuer); + // Assert that the provider supports the code flow + if (metadata.getResponseTypes().stream().noneMatch(ResponseType::impliesCodeFlow)) { + throw new AuthorizationSetupException("OIDC provider at "+this.issuer.getValue()+" does not support code flow, disabling."); + } + return metadata; } catch (IOException ex) { logger.severe("OIDC provider metadata at \"+issuerEndpointURL+\" not retrievable: "+ex.getMessage()); throw new AuthorizationSetupException("OIDC provider metadata at "+this.issuer.getValue()+" not retrievable."); @@ -93,11 +128,6 @@ void getMetadata() throws AuthorizationSetupException { logger.severe("OIDC provider metadata at \"+issuerEndpointURL+\" not parsable: "+ex.getMessage()); throw new AuthorizationSetupException("OIDC provider metadata at "+this.issuer.getValue()+" not parsable."); } - - // Assert that the provider supports the code flow - if (! this.idpMetadata.getResponseTypes().stream().filter(idp -> idp.impliesCodeFlow()).findAny().isPresent()) { - throw new AuthorizationSetupException("OIDC provider at "+this.issuer.getValue()+" does not support code flow, disabling."); - } } /** @@ -146,6 +176,7 @@ public String buildAuthzUrl(String state, String callbackUrl) { State stateObject = new State(state); URI callback = URI.create(callbackUrl); Nonce nonce = new Nonce(); + CodeVerifier pkceVerifier = pkceEnabled ? new CodeVerifier() : null; AuthenticationRequest req = new AuthenticationRequest.Builder(new ResponseType("code"), Scope.parse(this.scope), @@ -153,9 +184,17 @@ public String buildAuthzUrl(String state, String callbackUrl) { callback) .endpointURI(idpMetadata.getAuthorizationEndpointURI()) .state(stateObject) + // Called method is nullsafe - will disable sending a PKCE challenge in case the verifier is not present + .codeChallenge(pkceVerifier, pkceMethod) .nonce(nonce) .build(); + // Cache the PKCE verifier, as we need the secret in it for verification later again, after the client sends us + // the auth code! We use the state to cache the verifier, as the state is unique per authentication event. + if (pkceVerifier != null) { + this.verifierCache.put(state, pkceVerifier); + } + return req.toURI().toString(); } @@ -171,10 +210,14 @@ public String buildAuthzUrl(String state, String callbackUrl) { * @throws ExecutionException Thrown when the requests thread is failing */ @Override - public OAuth2UserRecord getUserRecord(String code, String redirectUrl) - throws IOException, OAuth2Exception, InterruptedException, ExecutionException { - // Create grant object - AuthorizationGrant codeGrant = new AuthorizationCodeGrant(new AuthorizationCode(code), URI.create(redirectUrl)); + public OAuth2UserRecord getUserRecord(String code, String state, String redirectUrl) throws IOException, OAuth2Exception { + // Retrieve the verifier from the cache and clear from the cache. If not found, will be null. + // Will be sent to token endpoint for verification, so if required but missing, will lead to exception. + CodeVerifier verifier = verifierCache.getIfPresent(state); + + // Create grant object - again, this is null-safe for the verifier + AuthorizationGrant codeGrant = new AuthorizationCodeGrant( + new AuthorizationCode(code), URI.create(redirectUrl), verifier); // Get Access Token first Optional accessToken = getAccessToken(codeGrant); @@ -275,16 +318,42 @@ Optional getUserInfo(BearerAccessToken accessToken) throws IOException } /** - * Returns the UserRecordIdentifier corresponding to the given accessToken if valid. - * UserRecordIdentifier (same used as in OAuth2UserRecord), i.e. can be used to find a local UserAccount. - * @param accessToken - * @return Returns the UserRecordIdentifier corresponding to the given accessToken if valid. - * @throws IOException - * @throws OAuth2Exception + * Trades an access token for an {@link UserRecordIdentifier} (if valid). + * + * @apiNote The resulting {@link UserRecordIdentifier} may be used with + * {@link edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean#lookupUser(UserRecordIdentifier)} + * to look up an {@link edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser} from the database. + * @see edu.harvard.iq.dataverse.api.auth.BearerTokenAuthMechanism + * + * @param accessToken The token to use when requesting user information from the provider + * @return Returns an {@link UserRecordIdentifier} for a valid access token or an empty {@link Optional}. + * @throws IOException In case communication with the endpoint fails to succeed for an I/O reason */ - public Optional getUserIdentifierForValidToken(BearerAccessToken accessToken) throws IOException, OAuth2Exception{ - // Request the UserInfoEndpoint to obtain UserInfo, since this endpoint also validate the Token we can reuse the existing code path. - // As an alternative we could use the Introspect Endpoint or assume the Token as some encoded information (i.e. JWT). - return Optional.of(new UserRecordIdentifier( this.getId(), getUserInfo(accessToken).get().getSubject().getValue())); + public Optional getUserIdentifier(BearerAccessToken accessToken) throws IOException { + OAuth2UserRecord userRecord; + try { + // Try to retrieve with given token (throws if invalid token) + Optional userInfo = getUserInfo(accessToken); + + if (userInfo.isPresent()) { + // Take this detour to avoid code duplication and potentially hard to track conversion errors. + userRecord = getUserRecord(userInfo.get()); + } else { + // This should not happen - an error at the provider side will lead to an exception. + logger.log(Level.WARNING, + "User info retrieval from {0} returned empty optional but expected exception for token {1}.", + List.of(getId(), accessToken).toArray() + ); + return Optional.empty(); + } + } catch (OAuth2Exception e) { + logger.log(Level.FINE, + "Could not retrieve user info with token {0} at provider {1}: {2}", + List.of(accessToken, getId(), e.getMessage()).toArray()); + logger.log(Level.FINER, "Retrieval failed, details as follows: ", e); + return Optional.empty(); + } + + return Optional.of(userRecord.getUserRecordIdentifier()); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthenticationProviderFactory.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthenticationProviderFactory.java index c6d1a28e19d..3f8c18d0567 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthenticationProviderFactory.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthenticationProviderFactory.java @@ -5,6 +5,7 @@ import edu.harvard.iq.dataverse.authorization.providers.AuthenticationProviderFactory; import edu.harvard.iq.dataverse.authorization.providers.AuthenticationProviderRow; import edu.harvard.iq.dataverse.authorization.providers.oauth2.OAuth2AuthenticationProviderFactory; +import edu.harvard.iq.dataverse.settings.JvmSettings; import java.util.Map; @@ -37,11 +38,39 @@ public String getInfo() { public AuthenticationProvider buildProvider( AuthenticationProviderRow aRow ) throws AuthorizationSetupException { Map factoryData = OAuth2AuthenticationProviderFactory.parseFactoryData(aRow.getFactoryData()); - OIDCAuthProvider oidc = new OIDCAuthProvider(factoryData.get("clientId"), factoryData.get("clientSecret"), factoryData.get("issuer")); + OIDCAuthProvider oidc = new OIDCAuthProvider( + factoryData.get("clientId"), + factoryData.get("clientSecret"), + factoryData.get("issuer"), + Boolean.parseBoolean(factoryData.getOrDefault("pkceEnabled", "false")), + factoryData.getOrDefault("pkceMethod", "S256") + ); + oidc.setId(aRow.getId()); oidc.setTitle(aRow.getTitle()); oidc.setSubTitle(aRow.getSubtitle()); return oidc; } + + /** + * Build an OIDC provider from MicroProfile Config provisioned details + * @return The configured auth provider + * @throws AuthorizationSetupException + */ + public static AuthenticationProvider buildFromSettings() throws AuthorizationSetupException { + OIDCAuthProvider oidc = new OIDCAuthProvider( + JvmSettings.OIDC_CLIENT_ID.lookup(), + JvmSettings.OIDC_CLIENT_SECRET.lookup(), + JvmSettings.OIDC_AUTH_SERVER_URL.lookup(), + JvmSettings.OIDC_PKCE_ENABLED.lookupOptional(Boolean.class).orElse(false), + JvmSettings.OIDC_PKCE_METHOD.lookupOptional().orElse("S256") + ); + + oidc.setId("oidc-mpconfig"); + oidc.setTitle(JvmSettings.OIDC_TITLE.lookupOptional().orElse("OpenID Connect")); + oidc.setSubTitle(JvmSettings.OIDC_SUBTITLE.lookupOptional().orElse("OpenID Connect")); + + return oidc; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/shib/ShibUtil.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/shib/ShibUtil.java index fff135e0dec..4cf41903405 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/shib/ShibUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/shib/ShibUtil.java @@ -133,7 +133,24 @@ public static String findSingleValue(String mayHaveMultipleValues) { return singleValue; } + /** + * @deprecated because of a typo; use {@link #generateFriendlyLookingUserIdentifier(String, String)} instead + * @see #generateFriendlyLookingUserIdentifier(String, String) + * @param usernameAssertion + * @param email + * @return a friendly-looking user identifier based on the asserted username or email, or a UUID as fallback + */ + @Deprecated public static String generateFriendlyLookingUserIdentifer(String usernameAssertion, String email) { + return generateFriendlyLookingUserIdentifier(usernameAssertion, email); + } + + /** + * @param usernameAssertion + * @param email + * @return a friendly-looking user identifier based on the asserted username or email, or a UUID as fallback + */ + public static String generateFriendlyLookingUserIdentifier(String usernameAssertion, String email) { if (usernameAssertion != null && !usernameAssertion.isEmpty()) { return usernameAssertion; } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java b/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java index 89429b912f6..b307c655798 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java @@ -1,7 +1,9 @@ package edu.harvard.iq.dataverse.authorization.users; import edu.harvard.iq.dataverse.Cart; +import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DatasetLock; +import edu.harvard.iq.dataverse.FileAccessRequest; import edu.harvard.iq.dataverse.UserNotification.Type; import edu.harvard.iq.dataverse.UserNotification; import edu.harvard.iq.dataverse.validation.ValidateEmail; @@ -17,6 +19,7 @@ import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import java.io.Serializable; import java.sql.Timestamp; +import java.util.ArrayList; import java.util.Date; import java.util.HashSet; import java.util.List; @@ -28,6 +31,7 @@ import jakarta.persistence.CascadeType; import jakarta.persistence.Column; import jakarta.persistence.Entity; +import jakarta.persistence.FetchType; import jakarta.persistence.GeneratedValue; import jakarta.persistence.GenerationType; import jakarta.persistence.Id; @@ -38,8 +42,8 @@ import jakarta.persistence.PostLoad; import jakarta.persistence.PrePersist; import jakarta.persistence.Transient; +import jakarta.validation.constraints.NotBlank; import jakarta.validation.constraints.NotNull; -import org.hibernate.validator.constraints.NotBlank; /** * When adding an attribute to this class, be sure to update the following: @@ -64,7 +68,8 @@ @NamedQuery( name="AuthenticatedUser.filter", query="select au from AuthenticatedUser au WHERE (" + "LOWER(au.userIdentifier) like LOWER(:query) OR " - + "lower(concat(au.firstName,' ',au.lastName)) like lower(:query))"), + + "lower(concat(au.firstName,' ',au.lastName)) like lower(:query) or " + + "lower(au.email) like lower(:query))"), @NamedQuery( name="AuthenticatedUser.findAdminUser", query="select au from AuthenticatedUser au WHERE " + "au.superuser = true " @@ -202,6 +207,29 @@ public void setDatasetLocks(List datasetLocks) { @OneToMany(mappedBy = "user", cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) private List oAuth2TokenDatas; + /*for many to many fileAccessRequests*/ + @OneToMany(mappedBy = "user", cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST, CascadeType.REFRESH}, fetch = FetchType.LAZY) + private List fileAccessRequests; + + public List getFileAccessRequests() { + return fileAccessRequests; + } + + public void setFileAccessRequests(List fARs) { + this.fileAccessRequests = fARs; + } + + public List getRequestedDataFiles(){ + List requestedDataFiles = new ArrayList<>(); + + for(FileAccessRequest far : getFileAccessRequests()){ + if(far.isStateCreated()) { + requestedDataFiles.add(far.getDataFile()); + } + } + return requestedDataFiles; + } + @Override public AuthenticatedUserDisplayInfo getDisplayInfo() { return new AuthenticatedUserDisplayInfo(firstName, lastName, email, affiliation, position); diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java b/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java index f64b5c301e7..03f018221fd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java @@ -12,7 +12,7 @@ */ public class PrivateUrlUser implements User { - public static final String PREFIX = "#"; + public static final String PREFIX = "!"; /** * In the future, this could probably be dvObjectId rather than datasetId, diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java index 593a5cbfdc3..a2f76150d7b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java @@ -450,7 +450,7 @@ private void loadChecksumManifest() { // We probably want package files to be able to use specific stores instead. // More importantly perhaps, the approach above does not take into account // if the dataset may have an AlternativePersistentIdentifier, that may be - // designated isStorageLocationDesignator() - i.e., if a different identifer + // designated isStorageLocationDesignator() - i.e., if a different identifier // needs to be used to name the storage directory, instead of the main/current // persistent identifier above. getJobLogger().log(Level.INFO, "Reading checksum manifest: " + manifestAbsolutePath); diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java index fb702c21df2..9ce30683a87 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java @@ -109,7 +109,7 @@ public void open(Serializable checkpoint) throws Exception { // We probably want package files to be able to use specific stores instead. // More importantly perhaps, the approach above does not take into account // if the dataset may have an AlternativePersistentIdentifier, that may be - // designated isStorageLocationDesignator() - i.e., if a different identifer + // designated isStorageLocationDesignator() - i.e., if a different identifier // needs to be used to name the storage directory, instead of the main/current // persistent identifier above. getJobLogger().log(Level.INFO, "Reading dataset directory: " + directory.getAbsolutePath() diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordWriter.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordWriter.java index ba34a3d1ed1..af1e9c6a294 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordWriter.java @@ -33,6 +33,7 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; @@ -58,7 +59,6 @@ import java.util.logging.Level; import java.util.logging.Logger; import jakarta.servlet.http.HttpServletRequest; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; @Named @Dependent @@ -360,31 +360,22 @@ private DataFile createPackageDataFile(List files) { if (commandEngine.getContext().systemConfig().isFilePIDsEnabledForCollection(dataset.getOwner())) { - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(packageFile.getProtocol(), commandEngine.getContext()); + PidProvider pidProvider = commandEngine.getContext().dvObjects().getEffectivePidGenerator(dataset); if (packageFile.getIdentifier() == null || packageFile.getIdentifier().isEmpty()) { - packageFile.setIdentifier(idServiceBean.generateDataFileIdentifier(packageFile)); - } - String nonNullDefaultIfKeyNotFound = ""; - String protocol = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); - String authority = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); - if (packageFile.getProtocol() == null) { - packageFile.setProtocol(protocol); - } - if (packageFile.getAuthority() == null) { - packageFile.setAuthority(authority); + pidProvider.generatePid(packageFile); } if (!packageFile.isIdentifierRegistered()) { String doiRetString = ""; - idServiceBean = GlobalIdServiceBean.getBean(commandEngine.getContext()); + try { - doiRetString = idServiceBean.createIdentifier(packageFile); + doiRetString = pidProvider.createIdentifier(packageFile); } catch (Throwable e) { } // Check return value to make sure registration succeeded - if (!idServiceBean.registerWhenPublished() && doiRetString.contains(packageFile.getIdentifier())) { + if (!pidProvider.registerWhenPublished() && doiRetString.contains(packageFile.getIdentifier())) { packageFile.setIdentifierRegistered(true); packageFile.setGlobalIdCreateTime(new Date()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java new file mode 100644 index 00000000000..10ff68a56f3 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java @@ -0,0 +1,344 @@ +package edu.harvard.iq.dataverse.dataaccess; + +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.channels.Channel; +import java.nio.file.Path; +import java.security.KeyManagementException; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import java.util.List; +import java.util.function.Predicate; +import java.util.logging.Logger; + +import javax.net.ssl.SSLContext; + +import org.apache.http.client.config.CookieSpecs; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.config.Registry; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.conn.socket.ConnectionSocketFactory; +import org.apache.http.conn.ssl.NoopHostnameVerifier; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.conn.ssl.TrustAllStrategy; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.apache.http.ssl.SSLContextBuilder; +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DvObject; + + +/** + * A base class for StorageIO implementations supporting remote access. At present, that includes the RemoteOverlayAccessIO store and the newer GlobusOverlayAccessIO store. It primarily includes + * common methods for handling auxiliary files in the configured base store. + * @param + */ +public abstract class AbstractRemoteOverlayAccessIO extends StorageIO { + + protected static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); + public static final String REFERENCE_ENDPOINTS_WITH_BASEPATHS = "reference-endpoints-with-basepaths"; + static final String BASE_STORE = "base-store"; + protected static final String SECRET_KEY = "secret-key"; + static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes"; + protected static final String REMOTE_STORE_NAME = "remote-store-name"; + protected static final String REMOTE_STORE_URL = "remote-store-url"; + + // Whether Dataverse can access the file bytes + // Currently False only for the Globus store when using the S3Connector, and Remote Stores like simple web servers where the URLs resolve to the actual file bits + static final String FILES_NOT_ACCESSIBLE_BY_DATAVERSE = "files-not-accessible-by-dataverse"; + + protected StorageIO baseStore = null; + protected String path = null; + protected PoolingHttpClientConnectionManager cm = null; + CloseableHttpClient httpclient = null; + protected static HttpClientContext localContext = HttpClientContext.create(); + + protected int timeout = 1200; + protected RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) + .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build(); + protected static boolean trustCerts = false; + protected int httpConcurrency = 4; + + public static String getBaseStoreIdFor(String driverId) { + return getConfigParamForDriver(driverId, BASE_STORE); + } + + public AbstractRemoteOverlayAccessIO() { + super(); + } + + public AbstractRemoteOverlayAccessIO(String storageLocation, String driverId) { + super(storageLocation, driverId); + } + + public AbstractRemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) { + super(dvObject, req, driverId); + } + + @Override + public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { + return baseStore.openAuxChannel(auxItemTag, options); + } + + @Override + public boolean isAuxObjectCached(String auxItemTag) throws IOException { + return baseStore.isAuxObjectCached(auxItemTag); + } + + @Override + public long getAuxObjectSize(String auxItemTag) throws IOException { + return baseStore.getAuxObjectSize(auxItemTag); + } + + @Override + public Path getAuxObjectAsPath(String auxItemTag) throws IOException { + return baseStore.getAuxObjectAsPath(auxItemTag); + } + + @Override + public void backupAsAux(String auxItemTag) throws IOException { + baseStore.backupAsAux(auxItemTag); + } + + @Override + public void revertBackupAsAux(String auxItemTag) throws IOException { + baseStore.revertBackupAsAux(auxItemTag); + } + + @Override + public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException { + baseStore.savePathAsAux(fileSystemPath, auxItemTag); + } + + @Override + public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException { + baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize); + } + + /** + * @param inputStream InputStream we want to save + * @param auxItemTag String representing this Auxiliary type ("extension") + * @throws IOException if anything goes wrong. + */ + @Override + public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException { + baseStore.saveInputStreamAsAux(inputStream, auxItemTag); + } + + @Override + public List listAuxObjects() throws IOException { + return baseStore.listAuxObjects(); + } + + @Override + public void deleteAuxObject(String auxItemTag) throws IOException { + baseStore.deleteAuxObject(auxItemTag); + } + + @Override + public void deleteAllAuxObjects() throws IOException { + baseStore.deleteAllAuxObjects(); + } + + @Override + public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException { + return baseStore.getAuxFileAsInputStream(auxItemTag); + } + + protected int getUrlExpirationMinutes() { + String optionValue = getConfigParam(URL_EXPIRATION_MINUTES); + if (optionValue != null) { + Integer num; + try { + num = Integer.parseInt(optionValue); + } catch (NumberFormatException ex) { + num = null; + } + if (num != null) { + return num; + } + } + return 60; + } + + public CloseableHttpClient getSharedHttpClient() { + if (httpclient == null) { + try { + initHttpPool(); + httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build(); + + } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { + logger.warning(ex.getMessage()); + } + } + return httpclient; + } + + private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException { + if (trustCerts) { + // use the TrustSelfSignedStrategy to allow Self Signed Certificates + SSLContext sslContext; + SSLConnectionSocketFactory connectionFactory; + + sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build(); + // create an SSL Socket Factory to use the SSLContext with the trust self signed + // certificate strategy + // and allow all hosts verifier. + connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); + + Registry registry = RegistryBuilder.create() + .register("https", connectionFactory).build(); + cm = new PoolingHttpClientConnectionManager(registry); + } else { + cm = new PoolingHttpClientConnectionManager(); + } + cm.setDefaultMaxPerRoute(httpConcurrency); + cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20); + } + + @Override + abstract public long retrieveSizeFromMedia(); + + @Override + public boolean exists() { + logger.fine("Exists called"); + return (retrieveSizeFromMedia() != -1); + } + + @Override + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { + return baseStore.cleanUp(filter, dryRun); + } + + @Override + public String getStorageLocation() throws IOException { + String fullStorageLocation = dvObject.getStorageIdentifier(); + logger.fine("storageidentifier: " + fullStorageLocation); + int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); + if (driverIndex >= 0) { + fullStorageLocation = fullStorageLocation + .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + } + if (this.getDvObject() instanceof Dataset) { + throw new IOException("AbstractRemoteOverlayAccessIO: Datasets are not a supported dvObject"); + } else if (this.getDvObject() instanceof DataFile) { + fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation; + } else if (dvObject instanceof Dataverse) { + throw new IOException("AbstractRemoteOverlayAccessIO: Dataverses are not a supported dvObject"); + } + logger.fine("fullStorageLocation: " + fullStorageLocation); + return fullStorageLocation; + } + protected void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { + + if (baseStore == null) { + String baseDriverId = getBaseStoreIdFor(driverId); + String fullStorageLocation = null; + String baseDriverType = getConfigParamForDriver(baseDriverId, StorageIO.TYPE, + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + + if (dvObject instanceof Dataset) { + baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); + } else { + if (this.getDvObject() != null) { + fullStorageLocation = getStoragePath(); + + // S3 expects :/// + switch (baseDriverType) { + case DataAccess.S3: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/" + + fullStorageLocation; + break; + case DataAccess.FILE: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files") + + "/" + fullStorageLocation; + break; + default: + logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " + + getConfigParamForDriver(baseDriverId, StorageIO.TYPE)); + throw new IOException("Not supported"); + } + + } else if (storageLocation != null) { + // ://// + // remoteDriverId:// is removed if coming through directStorageIO + int index = storageLocation.indexOf(DataAccess.SEPARATOR); + if (index > 0) { + storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); + } + // The base store needs the baseStoreIdentifier and not the relative URL (if it exists) + int endOfId = storageLocation.indexOf("//"); + fullStorageLocation = (endOfId>-1) ? storageLocation.substring(0, endOfId) : storageLocation; + + switch (baseDriverType) { + case DataAccess.S3: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/" + + fullStorageLocation; + break; + case DataAccess.FILE: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files") + + "/" + fullStorageLocation; + break; + default: + logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " + + getConfigParamForDriver(baseDriverId, StorageIO.TYPE)); + throw new IOException("Not supported"); + } + } + baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); + } + if (baseDriverType.contentEquals(DataAccess.S3)) { + ((S3AccessIO) baseStore).setMainDriver(false); + } + } + remoteStoreName = getConfigParam(REMOTE_STORE_NAME); + try { + remoteStoreUrl = new URL(getConfigParam(REMOTE_STORE_URL)); + } catch (MalformedURLException mfue) { + logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); + } + } + + protected String getStoragePath() throws IOException { + String fullStoragePath = dvObject.getStorageIdentifier(); + logger.fine("storageidentifier: " + fullStoragePath); + int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); + if (driverIndex >= 0) { + fullStoragePath = fullStoragePath + .substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + } + int suffixIndex = fullStoragePath.indexOf("//"); + if (suffixIndex >= 0) { + fullStoragePath = fullStoragePath.substring(0, suffixIndex); + } + if (getDvObject() instanceof Dataset) { + fullStoragePath = getDataset().getAuthorityForFileStorage() + "/" + + getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath; + } else if (getDvObject() instanceof DataFile) { + fullStoragePath = getDataFile().getOwner().getAuthorityForFileStorage() + "/" + + getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; + } else if (dvObject instanceof Dataverse) { + throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); + } + logger.fine("fullStoragePath: " + fullStoragePath); + return fullStoragePath; + } + + public static boolean isNotDataverseAccessible(String storeId) { + return Boolean.parseBoolean(StorageIO.getConfigParamForDriver(storeId, FILES_NOT_ACCESSIBLE_BY_DATAVERSE)); + } + + + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index d046fa4661d..a1bcbe49327 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -48,6 +48,7 @@ public DataAccess() { public static final String S3 = "s3"; static final String SWIFT = "swift"; static final String REMOTE = "remote"; + public static final String GLOBUS = "globus"; static final String TMP = "tmp"; public static final String SEPARATOR = "://"; //Default to "file" is for tests only @@ -98,6 +99,8 @@ protected static StorageIO getStorageIO(T dvObject, Data return new SwiftAccessIO<>(dvObject, req, storageDriverId); case REMOTE: return new RemoteOverlayAccessIO<>(dvObject, req, storageDriverId); + case GLOBUS: + return new GlobusOverlayAccessIO<>(dvObject, req, storageDriverId); case TMP: throw new IOException( "DataAccess IO attempted on a temporary file that hasn't been permanently saved yet."); @@ -129,6 +132,8 @@ public static StorageIO getDirectStorageIO(String fullStorageLocation) return new SwiftAccessIO<>(storageLocation, storageDriverId); case REMOTE: return new RemoteOverlayAccessIO<>(storageLocation, storageDriverId); + case GLOBUS: + return new GlobusOverlayAccessIO<>(storageLocation, storageDriverId); default: logger.warning("Could not find storage driver for: " + fullStorageLocation); throw new IOException("getDirectStorageIO: Unsupported storage method."); @@ -148,19 +153,41 @@ public static String[] getDriverIdAndStorageLocation(String storageLocation) { } public static String getStorageIdFromLocation(String location) { - if(location.contains(SEPARATOR)) { - //It's a full location with a driverId, so strip and reapply the driver id - //NOte that this will strip the bucketname out (which s3 uses) but the S3IOStorage class knows to look at re-insert it - return location.substring(0,location.indexOf(SEPARATOR) +3) + location.substring(location.lastIndexOf('/')+1); - } - return location.substring(location.lastIndexOf('/')+1); + if (location.contains(SEPARATOR)) { + // It's a full location with a driverId, so strip and reapply the driver id + // NOte that this will strip the bucketname out (which s3 uses) but the + // S3IOStorage class knows to look at re-insert it + return location.substring(0, location.indexOf(SEPARATOR) + 3) + + location.substring(location.lastIndexOf('/') + 1); + } + return location.substring(location.lastIndexOf('/') + 1); + } + + /** Changes storageidentifiers of the form + * s3://bucketname/18b39722140-50eb7d3c5ece or file://18b39722140-50eb7d3c5ece to s3://10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece + * and + * 18b39722140-50eb7d3c5ece to 10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece + * @param id + * @param dataset + * @return + */ + public static String getLocationFromStorageId(String id, Dataset dataset) { + String path= dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/"; + if (id.contains(SEPARATOR)) { + // It's a full location with a driverId, so strip and reapply the driver id + // NOte that this will strip the bucketname out (which s3 uses) but the + // S3IOStorage class knows to look at re-insert it + return id.substring(0, id.indexOf(SEPARATOR) + 3) + path + + id.substring(id.lastIndexOf('/') + 1); + } + return path + id.substring(id.lastIndexOf('/') + 1); } public static String getDriverType(String driverId) { if(driverId.isEmpty() || driverId.equals("tmp")) { return "tmp"; } - return System.getProperty("dataverse.files." + driverId + ".type", "Undefined"); + return StorageIO.getConfigParamForDriver(driverId, StorageIO.TYPE, "Undefined"); } //This @@ -168,7 +195,7 @@ public static String getDriverPrefix(String driverId) throws IOException { if(driverId.isEmpty() || driverId.equals("tmp")) { return "tmp" + SEPARATOR; } - String storageType = System.getProperty("dataverse.files." + driverId + ".type", "Undefined"); + String storageType = StorageIO.getConfigParamForDriver(driverId, StorageIO.TYPE, "Undefined"); switch(storageType) { case FILE: return FileAccessIO.getDriverPrefix(driverId); @@ -236,7 +263,8 @@ public static StorageIO createNewStorageIO(T dvObject, S storageIO = new S3AccessIO<>(dvObject, null, storageDriverId); break; case REMOTE: - storageIO = createNewStorageIO(dvObject, storageTag, RemoteOverlayAccessIO.getBaseStoreIdFor(storageDriverId)) ; + case GLOBUS: + storageIO = createNewStorageIO(dvObject, storageTag, AbstractRemoteOverlayAccessIO.getBaseStoreIdFor(storageDriverId)) ; break; default: logger.warning("Could not find storage driver for: " + storageTag); @@ -369,9 +397,35 @@ public static boolean isValidDirectStorageIdentifier(String storageId) { return S3AccessIO.isValidIdentifier(driverId, storageId); case REMOTE: return RemoteOverlayAccessIO.isValidIdentifier(driverId, storageId); + case GLOBUS: + return GlobusOverlayAccessIO.isValidIdentifier(driverId, storageId); default: logger.warning("Request to validate for storage driver: " + driverId); } return false; } + + + + public static String getNewStorageIdentifier(String driverId) { + String storageType = DataAccess.getDriverType(driverId); + if (storageType.equals("tmp") || storageType.equals("Undefined")) { + return null; + } + switch (storageType) { + case FILE: + return FileAccessIO.getNewIdentifier(driverId); + case SWIFT: + return SwiftAccessIO.getNewIdentifier(driverId); + case S3: + return S3AccessIO.getNewIdentifier(driverId); + case REMOTE: + return RemoteOverlayAccessIO.getNewIdentifier(driverId); + case GLOBUS: + return GlobusOverlayAccessIO.getNewIdentifier(driverId); + default: + logger.warning("Request to validate for storage driver: " + driverId); + } + return null; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index d95df1567bd..26637ec5742 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -35,8 +35,6 @@ import java.util.List; import java.util.function.Predicate; import java.util.logging.Logger; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.stream.Collectors; // Dataverse imports: @@ -55,6 +53,7 @@ public class FileAccessIO extends StorageIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.FileAccessIO"); + public static final String DIRECTORY = "directory"; public FileAccessIO() { @@ -115,13 +114,14 @@ public void open (DataAccessOption... options) throws IOException { this.setInputStream(fin); setChannel(fin.getChannel()); - this.setSize(getLocalFileSize()); + this.setSize(retrieveSizeFromMedia()); if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") && dataFile.isTabularData() && dataFile.getDataTable() != null - && (!this.noVarHeader())) { + && (!this.noVarHeader()) + && (!dataFile.getDataTable().isStoredWithVariableHeader())) { List datavariables = dataFile.getDataTable().getDataVariables(); String varHeaderLine = generateVariableHeader(datavariables); @@ -506,21 +506,6 @@ public void delete() throws IOException { // Auxilary helper methods, filesystem access-specific: - private long getLocalFileSize () { - long fileSize = -1; - - try { - File testFile = getFileSystemPath().toFile(); - if (testFile != null) { - fileSize = testFile.length(); - } - return fileSize; - } catch (IOException ex) { - return -1; - } - - } - public FileInputStream openLocalFileAsInputStream () { FileInputStream in; @@ -595,7 +580,7 @@ private String getDatasetDirectory() throws IOException { protected String getFilesRootDirectory() { - String filesRootDirectory = System.getProperty("dataverse.files." + this.driverId + ".directory", "/tmp/files"); + String filesRootDirectory = getConfigParam(DIRECTORY, "/tmp/files"); return filesRootDirectory; } @@ -742,4 +727,18 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE return toDelete; } + @Override + public long retrieveSizeFromMedia() { + long fileSize = -1; + try { + File testFile = getFileSystemPath().toFile(); + if (testFile != null) { + fileSize = testFile.length(); + } + return fileSize; + } catch (IOException ex) { + return -1; + } + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java new file mode 100644 index 00000000000..8bed60d8302 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java @@ -0,0 +1,78 @@ +package edu.harvard.iq.dataverse.dataaccess; + +import jakarta.json.Json; +import jakarta.json.JsonArray; +import jakarta.json.JsonArrayBuilder; + +public interface GlobusAccessibleStore { + + //Whether Dataverse manages access controls for the Globus endpoint or not. + static final String MANAGED = "managed"; + /* + * transfer and reference endpoint formats: + * + * REFERENCE_ENDPOINTS_WITH_BASEPATHS - reference endpoints separated by a comma + */ + static final String TRANSFER_ENDPOINT_WITH_BASEPATH = "transfer-endpoint-with-basepath"; + static final String GLOBUS_TOKEN = "globus-token"; + + public static boolean isDataverseManaged(String driverId) { + return Boolean.parseBoolean(StorageIO.getConfigParamForDriver(driverId, MANAGED)); + } + + public static String getTransferEnpointWithPath(String driverId) { + return StorageIO.getConfigParamForDriver(driverId, GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH); + } + + public static String getTransferEndpointId(String driverId) { + String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, TRANSFER_ENDPOINT_WITH_BASEPATH); + int pathStart = endpointWithBasePath.indexOf("/"); + return pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath; + } + + public static String getTransferPath(String driverId) { + String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, TRANSFER_ENDPOINT_WITH_BASEPATH); + int pathStart = endpointWithBasePath.indexOf("/"); + return pathStart > 0 ? endpointWithBasePath.substring(pathStart) : ""; + + } + + public static JsonArray getReferenceEndpointsWithPaths(String driverId) { + String[] endpoints = StorageIO.getConfigParamForDriver(driverId, AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*"); + JsonArrayBuilder builder = Json.createArrayBuilder(); + for(int i=0;i://// + * + * Storage location: + * / + * + * Internal StorageIdentifier format: + * :// + * + * Storage location: + * /// + * + */ +public class GlobusOverlayAccessIO extends AbstractRemoteOverlayAccessIO implements GlobusAccessibleStore { + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO"); + + /* + * If this is set to true, the store supports Globus transfer in and + * Dataverse/the globus app manage file locations, access controls, deletion, + * etc. + */ + private Boolean dataverseManaged = null; + + private String relativeDirectoryPath; + + private String endpointPath; + + private String filename; + + private String[] allowedEndpoints; + private String endpoint; + + public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { + super(dvObject, req, driverId); + configureGlobusEndpoints(); + configureStores(req, driverId, null); + logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); + path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); + validatePath(path); + + logger.fine("Relative path: " + path); + } + + + public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException { + this.driverId = driverId; + configureGlobusEndpoints(); + configureStores(null, driverId, storageLocation); + if (isManaged()) { + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation); + path = parts[1]; + } else { + this.setIsLocalFile(false); + path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); + validatePath(path); + logger.fine("Referenced path: " + path); + } + } + + private boolean isManaged() { + if(dataverseManaged==null) { + dataverseManaged = GlobusAccessibleStore.isDataverseManaged(this.driverId); + } + return dataverseManaged; + } + + private String retrieveGlobusAccessToken() { + String globusToken = getConfigParam(GlobusAccessibleStore.GLOBUS_TOKEN); + + + AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); + return accessToken.getOtherTokens().get(0).getAccessToken(); + } + + private void parsePath() { + int filenameStart = path.lastIndexOf("/") + 1; + String endpointWithBasePath = null; + if (!isManaged()) { + endpointWithBasePath = findMatchingEndpoint(path, allowedEndpoints); + } else { + endpointWithBasePath = allowedEndpoints[0]; + } + //String endpointWithBasePath = baseEndpointPath.substring(baseEndpointPath.lastIndexOf(DataAccess.SEPARATOR) + 3); + int pathStart = endpointWithBasePath.indexOf("/"); + logger.fine("endpointWithBasePath: " + endpointWithBasePath); + endpointPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : ""); + logger.fine("endpointPath: " + endpointPath); + + + if (isManaged() && (dvObject!=null)) { + + Dataset ds = null; + if (dvObject instanceof Dataset) { + ds = (Dataset) dvObject; + } else if (dvObject instanceof DataFile) { + ds = ((DataFile) dvObject).getOwner(); + } + relativeDirectoryPath = "/" + ds.getAuthority() + "/" + ds.getIdentifier(); + } else { + relativeDirectoryPath = ""; + } + if (filenameStart > 0) { + relativeDirectoryPath = relativeDirectoryPath + path.substring(0, filenameStart); + } + logger.fine("relativeDirectoryPath finally: " + relativeDirectoryPath); + filename = path.substring(filenameStart); + endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath; + + + } + + private static String findMatchingEndpoint(String path, String[] allowedEndpoints) { + for(int i=0;i= 0) { + this.setSize(dataFile.getFilesize()); + } else { + logger.fine("Setting size"); + this.setSize(retrieveSizeFromMedia()); + } + // Only applies for the S3 Connector case (where we could have run an ingest) + if (dataFile.getContentType() != null + && dataFile.getContentType().equals("text/tab-separated-values") + && dataFile.isTabularData() + && dataFile.getDataTable() != null + && (!this.noVarHeader()) + && (!dataFile.getDataTable().isStoredWithVariableHeader())) { + + List datavariables = dataFile.getDataTable().getDataVariables(); + String varHeaderLine = generateVariableHeader(datavariables); + this.setVarHeader(varHeaderLine); + } + + } + + this.setMimeType(dataFile.getContentType()); + + try { + this.setFileName(dataFile.getFileMetadata().getLabel()); + } catch (Exception ex) { + this.setFileName("unknown"); + } + } else if (dvObject instanceof Dataset) { + throw new IOException( + "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet"); + } else if (dvObject instanceof Dataverse) { + throw new IOException( + "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet"); + } + } + + @Override + public Path getFileSystemPath() throws IOException { + throw new UnsupportedDataAccessOperationException( + this.getClass().getName() + ": savePath() not implemented in this storage driver."); + } + + @Override + public void savePath(Path fileSystemPath) throws IOException { + throw new UnsupportedDataAccessOperationException( + this.getClass().getName() + ": savePath() not implemented in this storage driver."); + } + + @Override + public void saveInputStream(InputStream inputStream) throws IOException { + throw new UnsupportedDataAccessOperationException( + this.getClass().getName() + ": savePath() not implemented in this storage driver."); + } + + @Override + public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { + throw new UnsupportedDataAccessOperationException( + this.getClass().getName() + ": savePath() not implemented in this storage driver."); + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index 2b4aed3a9a5..1be2bb79e0f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -48,6 +48,7 @@ import java.nio.channels.WritableByteChannel; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.io.IOUtils; //import org.primefaces.util.Base64; @@ -110,19 +111,30 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s } if (isThumbnailCached(storageIO, size)) { + logger.fine("Found cached thumbnail for " + file.getId()); return true; } + return generateThumbnail(file, storageIO, size); - logger.fine("Checking for thumbnail, file type: " + file.getContentType()); + } - if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) { - return generateImageThumbnail(storageIO, size); - } else if (file.getContentType().equalsIgnoreCase("application/pdf")) { - return generatePDFThumbnail(storageIO, size); + private static boolean generateThumbnail(DataFile file, StorageIO storageIO, int size) { + logger.log(Level.FINE, (file.isPreviewImageFail() ? "Not trying" : "Trying") + " to generate thumbnail, file id: " + file.getId()); + // Don't try to generate if there have been failures: + if (!file.isPreviewImageFail()) { + boolean thumbnailGenerated = false; + if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) { + thumbnailGenerated = generateImageThumbnail(storageIO, size); + } else if (file.getContentType().equalsIgnoreCase("application/pdf")) { + thumbnailGenerated = generatePDFThumbnail(storageIO, size); + } + if (!thumbnailGenerated) { + logger.fine("No thumbnail generated for " + file.getId()); + } + return thumbnailGenerated; } return false; - } // Note that this method works on ALL file types for which thumbnail @@ -184,6 +196,7 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s // We rely on ImageMagick to convert PDFs; so if it's not installed, // better give up right away: if (!isImageMagickInstalled()) { + logger.fine("Couldn't find ImageMagick"); return false; } @@ -195,6 +208,7 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s // will run the ImageMagick on it, and will save its output in another temp // file, and will save it as an "auxiliary" file via the driver. boolean tempFilesRequired = false; + File tempFile = null; try { Path pdfFilePath = storageIO.getFileSystemPath(); @@ -206,35 +220,33 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s tempFilesRequired = true; } catch (IOException ioex) { + logger.warning(ioex.getMessage()); // this on the other hand is likely a fatal condition :( return false; } if (tempFilesRequired) { - ReadableByteChannel pdfFileChannel; - + InputStream inputStream = null; try { storageIO.open(); - //inputStream = storageIO.getInputStream(); - pdfFileChannel = storageIO.getReadChannel(); + inputStream = storageIO.getInputStream(); } catch (Exception ioex) { logger.warning("caught Exception trying to open an input stream for " + storageIO.getDataFile().getStorageIdentifier()); return false; } - File tempFile; - FileChannel tempFileChannel = null; + OutputStream outputStream = null; try { tempFile = File.createTempFile("tempFileToRescale", ".tmp"); - tempFileChannel = new FileOutputStream(tempFile).getChannel(); - - tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize()); + outputStream = new FileOutputStream(tempFile); + //Reads/transfers all bytes from the input stream to the output stream. + inputStream.transferTo(outputStream); } catch (IOException ioex) { logger.warning("GenerateImageThumb: failed to save pdf bytes in a temporary file."); return false; } finally { - IOUtils.closeQuietly(tempFileChannel); - IOUtils.closeQuietly(pdfFileChannel); + IOUtils.closeQuietly(inputStream); + IOUtils.closeQuietly(outputStream); } sourcePdfFile = tempFile; } @@ -258,6 +270,12 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s logger.warning("failed to save generated pdf thumbnail, as AUX file " + THUMBNAIL_SUFFIX + size + "!"); return false; } + finally { + try { + tempFile.delete(); + } + catch (Exception e) {} + } } return true; @@ -359,6 +377,14 @@ private static boolean generateImageThumbnailFromInputStream(StorageIO logger.warning("Failed to rescale and/or save the image: " + ioex.getMessage()); return false; } + finally { + if(tempFileRequired) { + try { + tempFile.delete(); + } + catch (Exception e) {} + } + } return true; @@ -436,16 +462,8 @@ public static String getImageThumbnailAsBase64(DataFile file, int size) { if (cachedThumbnailChannel == null) { logger.fine("Null channel for aux object " + THUMBNAIL_SUFFIX + size); - // try to generate, if not available: - boolean generated = false; - if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) { - generated = generateImageThumbnail(storageIO, size); - } else if (file.getContentType().equalsIgnoreCase("application/pdf")) { - generated = generatePDFThumbnail(storageIO, size); - } - - if (generated) { - // try to open again: + // try to generate, if not available and hasn't failed before + if(generateThumbnail(file, storageIO, size)) { try { cachedThumbnailChannel = storageIO.openAuxChannel(THUMBNAIL_SUFFIX + size); } catch (Exception ioEx) { @@ -757,7 +775,7 @@ public static String generatePDFThumbnailFromFile(String fileLocation, int size) try { fileSize = new File(fileLocation).length(); } catch (Exception ex) { - // + logger.warning("Can't open file: " + fileLocation); } if (fileSize == 0 || fileSize > sizeLimit) { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java index be6f9df0254..de392b74cca 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java @@ -165,4 +165,9 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE throw new UnsupportedDataAccessOperationException("InputStreamIO: tthis method is not supported in this DataAccess driver."); } + @Override + public long retrieveSizeFromMedia() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 66c6a4cc2ee..bca70259cb7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -11,105 +11,77 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; -import java.net.URL; import java.nio.channels.Channel; import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; import java.nio.channels.WritableByteChannel; import java.nio.file.Path; -import java.security.KeyManagementException; -import java.security.KeyStoreException; -import java.security.NoSuchAlgorithmException; import java.util.List; -import java.util.function.Predicate; -import java.util.logging.Logger; import org.apache.http.Header; -import org.apache.http.client.config.CookieSpecs; -import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpDelete; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpHead; -import org.apache.http.client.protocol.HttpClientContext; -import org.apache.http.config.Registry; -import org.apache.http.config.RegistryBuilder; -import org.apache.http.conn.socket.ConnectionSocketFactory; -import org.apache.http.conn.ssl.NoopHostnameVerifier; -import org.apache.http.conn.ssl.SSLConnectionSocketFactory; -import org.apache.http.conn.ssl.TrustAllStrategy; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.protocol.HTTP; -import org.apache.http.ssl.SSLContextBuilder; import org.apache.http.util.EntityUtils; -import javax.net.ssl.SSLContext; - /** * @author qqmyers - * @param what it stores */ /* * Remote Overlay Driver * * StorageIdentifier format: - * ://// + * ://// + * + * baseUrl: http(s):// */ -public class RemoteOverlayAccessIO extends StorageIO { - - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); - - private StorageIO baseStore = null; - private String urlPath = null; - private String baseUrl = null; +public class RemoteOverlayAccessIO extends AbstractRemoteOverlayAccessIO { - private static HttpClientContext localContext = HttpClientContext.create(); - private PoolingHttpClientConnectionManager cm = null; - CloseableHttpClient httpclient = null; - private int timeout = 1200; - private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) - .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) - .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build(); - private static boolean trustCerts = false; - private int httpConcurrency = 4; + // A single baseUrl of the form http(s):// where this store can reference data + static final String BASE_URL = "base-url"; + String baseUrl = null; + public RemoteOverlayAccessIO() { + super(); + } + public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); this.setIsLocalFile(false); + configureRemoteEndpoints(); configureStores(req, driverId, null); logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); - urlPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); - validatePath(urlPath); - - logger.fine("Base URL: " + urlPath); + path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); + validatePath(path); + + logger.fine("Relative path: " + path); } public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOException { super(null, null, driverId); this.setIsLocalFile(false); + configureRemoteEndpoints(); configureStores(null, driverId, storageLocation); - urlPath = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); - validatePath(urlPath); - logger.fine("Base URL: " + urlPath); + path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); + validatePath(path); + logger.fine("Relative path: " + path); } - - private void validatePath(String path) throws IOException { + + protected void validatePath(String relPath) throws IOException { try { - URI absoluteURI = new URI(baseUrl + "/" + urlPath); - if(!absoluteURI.normalize().toString().startsWith(baseUrl)) { + URI absoluteURI = new URI(baseUrl + "/" + relPath); + if (!absoluteURI.normalize().toString().startsWith(baseUrl)) { throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s base-url"); } - } catch(URISyntaxException use) { + } catch (URISyntaxException use) { throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId); } - } - + } @Override public void open(DataAccessOption... options) throws IOException { @@ -150,10 +122,14 @@ public void open(DataAccessOption... options) throws IOException { this.setSize(dataFile.getFilesize()); } else { logger.fine("Setting size"); - this.setSize(getSizeFromHttpHeader()); + this.setSize(retrieveSizeFromMedia()); } - if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") - && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) { + if (dataFile.getContentType() != null + && dataFile.getContentType().equals("text/tab-separated-values") + && dataFile.isTabularData() + && dataFile.getDataTable() != null + && (!this.noVarHeader()) + && (!dataFile.getDataTable().isStoredWithVariableHeader())) { List datavariables = dataFile.getDataTable().getDataVariables(); String varHeaderLine = generateVariableHeader(datavariables); @@ -171,18 +147,17 @@ public void open(DataAccessOption... options) throws IOException { } } else if (dvObject instanceof Dataset) { throw new IOException( - "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); + "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet"); } else if (dvObject instanceof Dataverse) { throw new IOException( - "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); - } else { - this.setSize(getSizeFromHttpHeader()); + "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet"); } } - private long getSizeFromHttpHeader() { + @Override + public long retrieveSizeFromMedia() { long size = -1; - HttpHead head = new HttpHead(baseUrl + "/" + urlPath); + HttpHead head = new HttpHead(baseUrl + "/" + path); try { CloseableHttpResponse response = getSharedHttpClient().execute(head, localContext); @@ -224,12 +199,12 @@ public InputStream getInputStream() throws IOException { break; default: logger.warning("Response from " + get.getURI().toString() + " was " + code); - throw new IOException("Cannot retrieve: " + baseUrl + "/" + urlPath + " code: " + code); + throw new IOException("Cannot retrieve: " + baseUrl + "/" + path + " code: " + code); } } catch (Exception e) { logger.warning(e.getMessage()); e.printStackTrace(); - throw new IOException("Error retrieving: " + baseUrl + "/" + urlPath + " " + e.getMessage()); + throw new IOException("Error retrieving: " + baseUrl + "/" + path + " " + e.getMessage()); } setChannel(Channels.newChannel(super.getInputStream())); @@ -260,13 +235,13 @@ public void delete() throws IOException { throw new IOException("Direct Access IO must be used to permanently delete stored file objects"); } try { - HttpDelete del = new HttpDelete(baseUrl + "/" + urlPath); + HttpDelete del = new HttpDelete(baseUrl + "/" + path); CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext); try { int code = response.getStatusLine().getStatusCode(); switch (code) { case 200: - logger.fine("Sent DELETE for " + baseUrl + "/" + urlPath); + logger.fine("Sent DELETE for " + baseUrl + "/" + path); default: logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code); } @@ -275,7 +250,7 @@ public void delete() throws IOException { } } catch (Exception e) { logger.warning(e.getMessage()); - throw new IOException("Error deleting: " + baseUrl + "/" + urlPath); + throw new IOException("Error deleting: " + baseUrl + "/" + path); } @@ -284,104 +259,12 @@ public void delete() throws IOException { } - @Override - public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { - return baseStore.openAuxChannel(auxItemTag, options); - } - - @Override - public boolean isAuxObjectCached(String auxItemTag) throws IOException { - return baseStore.isAuxObjectCached(auxItemTag); - } - - @Override - public long getAuxObjectSize(String auxItemTag) throws IOException { - return baseStore.getAuxObjectSize(auxItemTag); - } - - @Override - public Path getAuxObjectAsPath(String auxItemTag) throws IOException { - return baseStore.getAuxObjectAsPath(auxItemTag); - } - - @Override - public void backupAsAux(String auxItemTag) throws IOException { - baseStore.backupAsAux(auxItemTag); - } - - @Override - public void revertBackupAsAux(String auxItemTag) throws IOException { - baseStore.revertBackupAsAux(auxItemTag); - } - - @Override - // this method copies a local filesystem Path into this DataAccess Auxiliary - // location: - public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException { - baseStore.savePathAsAux(fileSystemPath, auxItemTag); - } - - @Override - public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException { - baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize); - } - - /** - * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") - * @throws IOException if anything goes wrong. - */ - @Override - public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException { - baseStore.saveInputStreamAsAux(inputStream, auxItemTag); - } - - @Override - public List listAuxObjects() throws IOException { - return baseStore.listAuxObjects(); - } - - @Override - public void deleteAuxObject(String auxItemTag) throws IOException { - baseStore.deleteAuxObject(auxItemTag); - } - - @Override - public void deleteAllAuxObjects() throws IOException { - baseStore.deleteAllAuxObjects(); - } - - @Override - public String getStorageLocation() throws IOException { - String fullStorageLocation = dvObject.getStorageIdentifier(); - logger.fine("storageidentifier: " + fullStorageLocation); - int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); - if(driverIndex >=0) { - fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); - } - if (this.getDvObject() instanceof Dataset) { - throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject"); - } else if (this.getDvObject() instanceof DataFile) { - fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation; - } else if (dvObject instanceof Dataverse) { - throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); - } - logger.fine("fullStorageLocation: " + fullStorageLocation); - return fullStorageLocation; - } - @Override public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { throw new UnsupportedDataAccessOperationException( "RemoteOverlayAccessIO: this is a remote DataAccess IO object, it has no local filesystem path associated with it."); } - @Override - public boolean exists() { - logger.fine("Exists called"); - return (getSizeFromHttpHeader() != -1); - } - @Override public WritableByteChannel getWriteChannel() throws UnsupportedDataAccessOperationException { throw new UnsupportedDataAccessOperationException( @@ -394,20 +277,15 @@ public OutputStream getOutputStream() throws UnsupportedDataAccessOperationExcep "RemoteOverlayAccessIO: there are no output Streams associated with S3 objects."); } - @Override - public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException { - return baseStore.getAuxFileAsInputStream(auxItemTag); - } - @Override public boolean downloadRedirectEnabled() { - String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); + String optionValue = getConfigParam(StorageIO.DOWNLOAD_REDIRECT); if ("true".equalsIgnoreCase(optionValue)) { return true; } return false; } - + public boolean downloadRedirectEnabled(String auxObjectTag) { return baseStore.downloadRedirectEnabled(auxObjectTag); } @@ -418,11 +296,11 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary // ToDo - support remote auxiliary Files if (auxiliaryTag == null) { - String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secret-key"); + String secretKey = getConfigParam(SECRET_KEY); if (secretKey == null) { - return baseUrl + "/" + urlPath; + return baseUrl + "/" + path; } else { - return UrlSignerUtil.signUrl(baseUrl + "/" + urlPath, getUrlExpirationMinutes(), null, "GET", + return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", secretKey); } } else { @@ -430,27 +308,21 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary } } - int getUrlExpirationMinutes() { - String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes"); - if (optionValue != null) { - Integer num; - try { - num = Integer.parseInt(optionValue); - } catch (NumberFormatException ex) { - num = null; - } - if (num != null) { - return num; - } - } - return 60; - } - private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { - baseUrl = System.getProperty("dataverse.files." + this.driverId + ".base-url"); + /** This endpoint configures all the endpoints the store is allowed to reference data from. At present, the RemoteOverlayAccessIO only supports a single endpoint but + * the derived GlobusOverlayAccessIO can support multiple endpoints. + * @throws IOException + */ + protected void configureRemoteEndpoints() throws IOException { + baseUrl = getConfigParam(BASE_URL); if (baseUrl == null) { - throw new IOException("dataverse.files." + this.driverId + ".base-url is required"); - } else { + //Will accept the first endpoint using the newer setting + baseUrl = getConfigParam(REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*")[0]; + if (baseUrl == null) { + throw new IOException("dataverse.files." + this.driverId + ".base-url is required"); + } + } + if (baseUrl != null) { try { new URI(baseUrl); } catch (Exception e) { @@ -460,180 +332,42 @@ private void configureStores(DataAccessRequest req, String driverId, String stor } } - - if (baseStore == null) { - String baseDriverId = getBaseStoreIdFor(driverId); - String fullStorageLocation = null; - String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); - - if(dvObject instanceof Dataset) { - baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); - } else { - if (this.getDvObject() != null) { - fullStorageLocation = getStoragePath(); - - // S3 expects :/// - switch (baseDriverType) { - case DataAccess.S3: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" - + fullStorageLocation; - break; - case DataAccess.FILE: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" - + fullStorageLocation; - break; - default: - logger.warning("Not Implemented: RemoteOverlay store with base store type: " - + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not implemented"); - } - - } else if (storageLocation != null) { - // ://// - //remoteDriverId:// is removed if coming through directStorageIO - int index = storageLocation.indexOf(DataAccess.SEPARATOR); - if(index > 0) { - storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); - } - //THe base store needs the baseStoreIdentifier and not the relative URL - fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//")); - - switch (baseDriverType) { - case DataAccess.S3: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" - + fullStorageLocation; - break; - case DataAccess.FILE: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" - + fullStorageLocation; - break; - default: - logger.warning("Not Implemented: RemoteOverlay store with base store type: " - + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not implemented"); - } - } - baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); - } - if (baseDriverType.contentEquals(DataAccess.S3)) { - ((S3AccessIO) baseStore).setMainDriver(false); - } - } - remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remote-store-name"); - try { - remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url")); - } catch(MalformedURLException mfue) { - logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); - } - } - - //Convenience method to assemble the path, starting with the DOI authority/identifier/, that is needed to create a base store via DataAccess.getDirectStorageIO - the caller has to add the store type specific prefix required. - private String getStoragePath() throws IOException { - String fullStoragePath = dvObject.getStorageIdentifier(); - logger.fine("storageidentifier: " + fullStoragePath); - int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); - if(driverIndex >=0) { - fullStoragePath = fullStoragePath.substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); - } - int suffixIndex = fullStoragePath.indexOf("//"); - if(suffixIndex >=0) { - fullStoragePath = fullStoragePath.substring(0, suffixIndex); - } - if (this.getDvObject() instanceof Dataset) { - fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/" - + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath; - } else if (this.getDvObject() instanceof DataFile) { - fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" - + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; - }else if (dvObject instanceof Dataverse) { - throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); - } - logger.fine("fullStoragePath: " + fullStoragePath); - return fullStoragePath; - } - - public CloseableHttpClient getSharedHttpClient() { - if (httpclient == null) { - try { - initHttpPool(); - httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build(); - - } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { - logger.warning(ex.getMessage()); - } - } - return httpclient; - } - - private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException { - if (trustCerts) { - // use the TrustSelfSignedStrategy to allow Self Signed Certificates - SSLContext sslContext; - SSLConnectionSocketFactory connectionFactory; - - sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build(); - // create an SSL Socket Factory to use the SSLContext with the trust self signed - // certificate strategy - // and allow all hosts verifier. - connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); - - Registry registry = RegistryBuilder.create() - .register("https", connectionFactory).build(); - cm = new PoolingHttpClientConnectionManager(registry); - } else { - cm = new PoolingHttpClientConnectionManager(); - } - cm.setDefaultMaxPerRoute(httpConcurrency); - cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20); } @Override public void savePath(Path fileSystemPath) throws IOException { throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: savePath() not implemented in this storage driver."); + this.getClass().getName() + ": savePath() not implemented in this storage driver."); } @Override public void saveInputStream(InputStream inputStream) throws IOException { throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: saveInputStream() not implemented in this storage driver."); + this.getClass().getName() + ": saveInputStream() not implemented in this storage driver."); } @Override public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver."); + this.getClass().getName() + ": saveInputStream(InputStream, Long) not implemented in this storage driver."); } - protected static boolean isValidIdentifier(String driverId, String storageId) { + static boolean isValidIdentifier(String driverId, String storageId) { String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2); - String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url"); + String baseUrl = getConfigParamForDriver(driverId, BASE_URL); try { URI absoluteURI = new URI(baseUrl + "/" + urlPath); - if(!absoluteURI.normalize().toString().startsWith(baseUrl)) { + if (!absoluteURI.normalize().toString().startsWith(baseUrl)) { logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId); return false; } - } catch(URISyntaxException use) { + } catch (URISyntaxException use) { logger.warning("Could not interpret storageidentifier in remote store " + driverId + " : " + storageId); return false; } return true; } - - public static String getBaseStoreIdFor(String driverId) { - return System.getProperty("dataverse.files." + driverId + ".base-store"); - } - - @Override - public List cleanUp(Predicate filter, boolean dryRun) throws IOException { - return baseStore.cleanUp(filter, dryRun); - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 822ada0b83e..c2143bd4789 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -4,6 +4,7 @@ import com.amazonaws.ClientConfiguration; import com.amazonaws.HttpMethod; import com.amazonaws.SdkClientException; +import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.auth.AWSCredentialsProviderChain; import com.amazonaws.auth.AWSStaticCredentialsProvider; import com.amazonaws.auth.BasicAWSCredentials; @@ -57,9 +58,11 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.List; +import java.util.Optional; import java.util.Random; import java.util.function.Predicate; import java.util.logging.Logger; @@ -88,6 +91,16 @@ public class S3AccessIO extends StorageIO { private static final Config config = ConfigProvider.getConfig(); private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.S3AccessIO"); + static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes"; + static final String CUSTOM_ENDPOINT_URL = "custom-endpoint-url"; + static final String PROXY_URL = "proxy-url"; + static final String BUCKET_NAME = "bucket-name"; + static final String MIN_PART_SIZE = "min-part-size"; + static final String CUSTOM_ENDPOINT_REGION = "custom-endpoint-region"; + static final String PATH_STYLE_ACCESS = "path-style-access"; + static final String PAYLOAD_SIGNING = "payload-signing"; + static final String CHUNKED_ENCODING = "chunked-encoding"; + static final String PROFILE = "profile"; private boolean mainDriver = true; @@ -103,19 +116,18 @@ public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) { minPartSize = getMinPartSize(driverId); s3=getClient(driverId); tm=getTransferManager(driverId); - endpoint = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-url", ""); - proxy = System.getProperty("dataverse.files." + driverId + ".proxy-url", ""); + endpoint = getConfigParam(CUSTOM_ENDPOINT_URL, ""); + proxy = getConfigParam(PROXY_URL, ""); if(!StringUtil.isEmpty(proxy)&&StringUtil.isEmpty(endpoint)) { logger.severe(driverId + " config error: Must specify a custom-endpoint-url if proxy-url is specified"); } - //Not sure this is needed but moving it from the open method for now since it definitely doesn't need to run every time an object is opened. - try { - if (bucketName == null || !s3.doesBucketExistV2(bucketName)) { - throw new IOException("ERROR: S3AccessIO - You must create and configure a bucket before creating datasets."); - } - } catch (SdkClientException sce) { - throw new IOException("ERROR: S3AccessIO - Failed to look up bucket "+bucketName+" (is AWS properly configured?): " + sce.getMessage()); - } + + // FWIW: There used to be a check here to see if the bucket exists. + // It was very redundant (checking every time we access any file) and didn't do + // much but potentially make the failure (in the unlikely case a bucket doesn't + // exist/just disappeared) happen slightly earlier (here versus at the first + // file/metadata access). + } catch (Exception e) { throw new AmazonClientException( "Cannot instantiate a S3 client; check your AWS credentials and region", @@ -191,7 +203,7 @@ public void open(DataAccessOption... options) throws IOException { } } // else we're OK (assumes bucket name in storageidentifier matches the driver's bucketname) } else { - if(!storageIdentifier.substring((this.driverId + DataAccess.SEPARATOR).length()).contains(":")) { + if(!storageIdentifier.contains(":")) { //No driver id or bucket newStorageIdentifier= this.driverId + DataAccess.SEPARATOR + bucketName + ":" + storageIdentifier; } else { @@ -207,20 +219,14 @@ public void open(DataAccessOption... options) throws IOException { if (isReadAccess) { - key = getMainFileKey(); - ObjectMetadata objectMetadata = null; - try { - objectMetadata = s3.getObjectMetadata(bucketName, key); - } catch (SdkClientException sce) { - throw new IOException("Cannot get S3 object " + key + " ("+sce.getMessage()+")"); - } - this.setSize(objectMetadata.getContentLength()); + this.setSize(retrieveSizeFromMedia()); if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") && dataFile.isTabularData() && dataFile.getDataTable() != null - && (!this.noVarHeader())) { + && (!this.noVarHeader()) + && (!dataFile.getDataTable().isStoredWithVariableHeader())) { List datavariables = dataFile.getDataTable().getDataVariables(); String varHeaderLine = generateVariableHeader(datavariables); @@ -849,7 +855,7 @@ private static String getMainFileKey(String baseKey, String storageIdentifier, S @Override public boolean downloadRedirectEnabled() { - String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); + String optionValue = getConfigParam(DOWNLOAD_REDIRECT); if ("true".equalsIgnoreCase(optionValue)) { return true; } @@ -1073,7 +1079,7 @@ public JsonObjectBuilder generateTemporaryS3UploadUrls(String globalId, String s } int getUrlExpirationMinutes() { - String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes"); + String optionValue = getConfigParam(URL_EXPIRATION_MINUTES); if (optionValue != null) { Integer num; try { @@ -1089,7 +1095,7 @@ int getUrlExpirationMinutes() { } private static String getBucketName(String driverId) { - return System.getProperty("dataverse.files." + driverId + ".bucket-name"); + return getConfigParamForDriver(driverId, BUCKET_NAME); } private static long getMinPartSize(String driverId) { @@ -1097,7 +1103,7 @@ private static long getMinPartSize(String driverId) { // (minimum allowed is 5*1024**2 but it probably isn't worth the complexity starting at ~5MB. Also - confirmed that they use base 2 definitions) long min = 5 * 1024 * 1024l; - String partLength = System.getProperty("dataverse.files." + driverId + ".min-part-size"); + String partLength = getConfigParamForDriver(driverId, MIN_PART_SIZE); try { if (partLength != null) { long val = Long.parseLong(partLength); @@ -1146,12 +1152,12 @@ private static AmazonS3 getClient(String driverId) { * Pass in a URL pointing to your S3 compatible storage. * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html */ - String s3CEUrl = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-url", ""); + String s3CEUrl = getConfigParamForDriver(driverId, CUSTOM_ENDPOINT_URL, ""); /** * Pass in a region to use for SigV4 signing of requests. * Defaults to "dataverse" as it is not relevant for custom S3 implementations. */ - String s3CERegion = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-region", "dataverse"); + String s3CERegion = getConfigParamForDriver(driverId, CUSTOM_ENDPOINT_REGION, "dataverse"); // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones. if (!s3CEUrl.isEmpty()) { @@ -1161,7 +1167,7 @@ private static AmazonS3 getClient(String driverId) { * Pass in a boolean value if path style access should be used within the S3 client. * Anything but case-insensitive "true" will lead to value of false, which is default value, too. */ - Boolean s3pathStyleAccess = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".path-style-access", "false")); + Boolean s3pathStyleAccess = Boolean.parseBoolean(getConfigParamForDriver(driverId, PATH_STYLE_ACCESS, "false")); // some custom S3 implementations require "PathStyleAccess" as they us a path, not a subdomain. default = false s3CB.withPathStyleAccessEnabled(s3pathStyleAccess); @@ -1169,41 +1175,70 @@ private static AmazonS3 getClient(String driverId) { * Pass in a boolean value if payload signing should be used within the S3 client. * Anything but case-insensitive "true" will lead to value of false, which is default value, too. */ - Boolean s3payloadSigning = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".payload-signing","false")); + Boolean s3payloadSigning = Boolean.parseBoolean(getConfigParamForDriver(driverId, PAYLOAD_SIGNING,"false")); /** * Pass in a boolean value if chunked encoding should not be used within the S3 client. * Anything but case-insensitive "false" will lead to value of true, which is default value, too. */ - Boolean s3chunkedEncoding = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".chunked-encoding","true")); + Boolean s3chunkedEncoding = Boolean.parseBoolean(getConfigParamForDriver(driverId, CHUNKED_ENCODING,"true")); // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. default = false s3CB.setPayloadSigningEnabled(s3payloadSigning); // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. default = true // Boolean is inverted, otherwise setting dataverse.files..chunked-encoding=false would result in leaving Chunked Encoding enabled s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); - /** - * Pass in a string value if this storage driver should use a non-default AWS S3 profile. - * The default is "default" which should work when only one profile exists. + /** Configure credentials for the S3 client. There are multiple mechanisms available. + * Role-based/instance credentials are globally defined while the other mechanisms (profile, static) + * are defined per store. The logic below assures that + * * if a store specific profile or static credentials are explicitly set, they will be used in preference to the global role-based credentials. + * * if a store specific role-based credentials are explicitly set, they will be used in preference to the global instance credentials, + * * if a profile and static credentials are both explicitly set, the profile will be used preferentially, and + * * if no store-specific credentials are set, the global credentials will be preferred over using any "default" profile credentials that are found. */ - String s3profile = System.getProperty("dataverse.files." + driverId + ".profile","default"); - ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(s3profile); - - // Try to retrieve credentials via Microprofile Config API, too. For production use, you should not use env - // vars or system properties to provide these, but use the secrets config source provided by Payara. - AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider( - new BasicAWSCredentials( - config.getOptionalValue("dataverse.files." + driverId + ".access-key", String.class).orElse(""), - config.getOptionalValue("dataverse.files." + driverId + ".secret-key", String.class).orElse("") - )); - - //Add role-based provider as in the default provider chain - InstanceProfileCredentialsProvider instanceCredentials = InstanceProfileCredentialsProvider.getInstance(); + + ArrayList providers = new ArrayList<>(); + + String s3profile = getConfigParamForDriver(driverId, PROFILE); + boolean allowInstanceCredentials = true; + // Assume that instance credentials should not be used if the profile is + // actually set for this store or if static creds are provided (below). + if (s3profile != null) { + allowInstanceCredentials = false; + } + // Try to retrieve credentials via Microprofile Config API, too. For production + // use, you should not use env vars or system properties to provide these, but + // use the secrets config source provided by Payara. + Optional accessKey = config.getOptionalValue("dataverse.files." + driverId + ".access-key", String.class); + Optional secretKey = config.getOptionalValue("dataverse.files." + driverId + ".secret-key", String.class); + if (accessKey.isPresent() && secretKey.isPresent()) { + allowInstanceCredentials = false; + AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider( + new BasicAWSCredentials( + accessKey.get(), + secretKey.get())); + providers.add(staticCredentials); + } else if (s3profile == null) { + //Only use the default profile when it isn't explicitly set for this store when there are no static creds (otherwise it will be preferred). + s3profile = "default"; + } + if (s3profile != null) { + providers.add(new ProfileCredentialsProvider(s3profile)); + } + + if (allowInstanceCredentials) { + // Add role-based provider as in the default provider chain + providers.add(InstanceProfileCredentialsProvider.getInstance()); + } // Add all providers to chain - the first working provider will be used - // (role-based is first in the default cred provider chain, so we're just + // (role-based is first in the default cred provider chain (if no profile or + // static creds are explicitly set for the store), so we're just // reproducing that, then profile, then static credentials as the fallback) - AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(instanceCredentials, profileCredentials, staticCredentials); + + // As the order is the reverse of how we added providers, we reverse the list here + Collections.reverse(providers); + AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(providers); s3CB.setCredentials(providerChain); - + // let's build the client :-) AmazonS3 client = s3CB.build(); driverClientMap.put(driverId, client); @@ -1385,4 +1420,20 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE } return toDelete; } -} \ No newline at end of file + + @Override + public long retrieveSizeFromMedia() throws IOException { + key = getMainFileKey(); + ObjectMetadata objectMetadata = null; + try { + objectMetadata = s3.getObjectMetadata(bucketName, key); + } catch (SdkClientException sce) { + throw new IOException("Cannot get S3 object " + key + " (" + sce.getMessage() + ")"); + } + return objectMetadata.getContentLength(); + } + + public static String getNewIdentifier(String driverId) { + return driverId + DataAccess.SEPARATOR + getConfigParamForDriver(driverId, BUCKET_NAME) + ":" + FileUtil.generateStorageIdentifier(); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index bfd5c5f0d8f..51cdecf64a0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -20,12 +20,12 @@ package edu.harvard.iq.dataverse.dataaccess; - import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.datavariable.DataVariable; +import edu.harvard.iq.dataverse.util.FileUtil; import java.io.IOException; import java.io.InputStream; @@ -43,7 +43,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; - /** * * @author Leonid Andreev @@ -52,18 +51,27 @@ public abstract class StorageIO { + static final String INGEST_SIZE_LIMIT = "ingestsizelimit"; + static final String PUBLIC = "public"; + static final String TYPE = "type"; + static final String UPLOAD_REDIRECT = "upload-redirect"; + static final String UPLOAD_OUT_OF_BAND = "upload-out-of-band"; + protected static final String DOWNLOAD_REDIRECT = "download-redirect"; + protected static final String DATAVERSE_INACCESSIBLE = "dataverse-inaccessible"; + + public StorageIO() { } - + public StorageIO(String storageLocation, String driverId) { - this.driverId=driverId; + this.driverId = driverId; } public StorageIO(T dvObject, DataAccessRequest req, String driverId) { this.dvObject = dvObject; this.req = req; - this.driverId=driverId; + this.driverId = driverId; if (this.req == null) { this.req = new DataAccessRequest(); } @@ -72,18 +80,19 @@ public StorageIO(T dvObject, DataAccessRequest req, String driverId) { } } - - // Abstract methods to be implemented by the storage drivers: public abstract void open(DataAccessOption... option) throws IOException; protected boolean isReadAccess = false; protected boolean isWriteAccess = false; - //A public store is one in which files may be accessible outside Dataverse and therefore accessible without regard to Dataverse's access controls related to restriction and embargoes. - //Currently, this is just used to warn users at upload time rather than disable restriction/embargo. + // A public store is one in which files may be accessible outside Dataverse and + // therefore accessible without regard to Dataverse's access controls related to + // restriction and embargoes. + // Currently, this is just used to warn users at upload time rather than disable + // restriction/embargo. static protected Map driverPublicAccessMap = new HashMap(); - + public boolean canRead() { return isReadAccess; } @@ -94,115 +103,118 @@ public boolean canWrite() { public abstract String getStorageLocation() throws IOException; - // This method will return a Path, if the storage method is a - // local filesystem. Otherwise should throw an IOException. + // This method will return a Path, if the storage method is a + // local filesystem. Otherwise should throw an IOException. public abstract Path getFileSystemPath() throws IOException; - - public abstract boolean exists() throws IOException; - + + public abstract boolean exists() throws IOException; + public abstract void delete() throws IOException; - + // this method for copies a local Path (for ex., a // temp file, into this DataAccess location): public abstract void savePath(Path fileSystemPath) throws IOException; - + // same, for an InputStream: /** - * This method copies a local InputStream into this DataAccess location. - * Note that the S3 driver implementation of this abstract method is problematic, - * because S3 cannot save an object of an unknown length. This effectively - * nullifies any benefits of streaming; as we cannot start saving until we - * have read the entire stream. - * One way of solving this would be to buffer the entire stream as byte[], - * in memory, then save it... Which of course would be limited by the amount - * of memory available, and thus would not work for streams larger than that. - * So we have eventually decided to save save the stream to a temp file, then - * save to S3. This is slower, but guaranteed to work on any size stream. - * An alternative we may want to consider is to not implement this method - * in the S3 driver, and make it throw the UnsupportedDataAccessOperationException, - * similarly to how we handle attempts to open OutputStreams, in this and the - * Swift driver. - * (Not an issue in either FileAccessIO or SwiftAccessIO implementations) + * This method copies a local InputStream into this DataAccess location. Note + * that the S3 driver implementation of this abstract method is problematic, + * because S3 cannot save an object of an unknown length. This effectively + * nullifies any benefits of streaming; as we cannot start saving until we have + * read the entire stream. One way of solving this would be to buffer the entire + * stream as byte[], in memory, then save it... Which of course would be limited + * by the amount of memory available, and thus would not work for streams larger + * than that. So we have eventually decided to save save the stream to a temp + * file, then save to S3. This is slower, but guaranteed to work on any size + * stream. An alternative we may want to consider is to not implement this + * method in the S3 driver, and make it throw the + * UnsupportedDataAccessOperationException, similarly to how we handle attempts + * to open OutputStreams, in this and the Swift driver. (Not an issue in either + * FileAccessIO or SwiftAccessIO implementations) * * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") + * @param auxItemTag String representing this Auxiliary type ("extension") * @throws IOException if anything goes wrong. - */ + */ public abstract void saveInputStream(InputStream inputStream) throws IOException; + public abstract void saveInputStream(InputStream inputStream, Long filesize) throws IOException; - + // Auxiliary File Management: (new as of 4.0.2!) - + // An "auxiliary object" is an abstraction of the traditional DVN/Dataverse - // mechanism of storing extra files related to the man StudyFile/DataFile - - // such as "saved original" and cached format conversions for tabular files, - // thumbnails for images, etc. - in physical files with the same file - // name but various reserved extensions. - - //This function retrieves auxiliary files related to datasets, and returns them as inputstream - public abstract InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException ; - + // mechanism of storing extra files related to the man StudyFile/DataFile - + // such as "saved original" and cached format conversions for tabular files, + // thumbnails for images, etc. - in physical files with the same file + // name but various reserved extensions. + + // This function retrieves auxiliary files related to datasets, and returns them + // as inputstream + public abstract InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException; + public abstract Channel openAuxChannel(String auxItemTag, DataAccessOption... option) throws IOException; - - public abstract long getAuxObjectSize(String auxItemTag) throws IOException; - - public abstract Path getAuxObjectAsPath(String auxItemTag) throws IOException; - - public abstract boolean isAuxObjectCached(String auxItemTag) throws IOException; - - public abstract void backupAsAux(String auxItemTag) throws IOException; - - public abstract void revertBackupAsAux(String auxItemTag) throws IOException; - - // this method copies a local filesystem Path into this DataAccess Auxiliary location: + + public abstract long getAuxObjectSize(String auxItemTag) throws IOException; + + public abstract Path getAuxObjectAsPath(String auxItemTag) throws IOException; + + public abstract boolean isAuxObjectCached(String auxItemTag) throws IOException; + + public abstract void backupAsAux(String auxItemTag) throws IOException; + + public abstract void revertBackupAsAux(String auxItemTag) throws IOException; + + // this method copies a local filesystem Path into this DataAccess Auxiliary + // location: public abstract void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException; - + /** - * This method copies a local InputStream into this DataAccess Auxiliary location. - * Note that the S3 driver implementation of this abstract method is problematic, - * because S3 cannot save an object of an unknown length. This effectively - * nullifies any benefits of streaming; as we cannot start saving until we - * have read the entire stream. - * One way of solving this would be to buffer the entire stream as byte[], - * in memory, then save it... Which of course would be limited by the amount - * of memory available, and thus would not work for streams larger than that. - * So we have eventually decided to save save the stream to a temp file, then - * save to S3. This is slower, but guaranteed to work on any size stream. - * An alternative we may want to consider is to not implement this method - * in the S3 driver, and make it throw the UnsupportedDataAccessOperationException, - * similarly to how we handle attempts to open OutputStreams, in this and the - * Swift driver. - * (Not an issue in either FileAccessIO or SwiftAccessIO implementations) + * This method copies a local InputStream into this DataAccess Auxiliary + * location. Note that the S3 driver implementation of this abstract method is + * problematic, because S3 cannot save an object of an unknown length. This + * effectively nullifies any benefits of streaming; as we cannot start saving + * until we have read the entire stream. One way of solving this would be to + * buffer the entire stream as byte[], in memory, then save it... Which of + * course would be limited by the amount of memory available, and thus would not + * work for streams larger than that. So we have eventually decided to save save + * the stream to a temp file, then save to S3. This is slower, but guaranteed to + * work on any size stream. An alternative we may want to consider is to not + * implement this method in the S3 driver, and make it throw the + * UnsupportedDataAccessOperationException, similarly to how we handle attempts + * to open OutputStreams, in this and the Swift driver. (Not an issue in either + * FileAccessIO or SwiftAccessIO implementations) * * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") + * @param auxItemTag String representing this Auxiliary type ("extension") * @throws IOException if anything goes wrong. - */ - public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException; - public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException; - - public abstract ListlistAuxObjects() throws IOException; - - public abstract void deleteAuxObject(String auxItemTag) throws IOException; - + */ + public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException; + + public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) + throws IOException; + + public abstract List listAuxObjects() throws IOException; + + public abstract void deleteAuxObject(String auxItemTag) throws IOException; + public abstract void deleteAllAuxObjects() throws IOException; private DataAccessRequest req; private InputStream in = null; - private OutputStream out; + private OutputStream out; protected Channel channel; protected DvObject dvObject; protected String driverId; - /*private int status;*/ + /* private int status; */ private long size; /** - * Where in the file to seek to when reading (default is zero bytes, the - * start of the file). + * Where in the file to seek to when reading (default is zero bytes, the start + * of the file). */ private long offset; - + private String mimeType; private String fileName; private String varHeader; @@ -215,8 +227,8 @@ public boolean canWrite() { private String swiftContainerName; private boolean isLocalFile = false; - /*private boolean isRemoteAccess = false;*/ - /*private boolean isHttpAccess = false;*/ + /* private boolean isRemoteAccess = false; */ + /* private boolean isHttpAccess = false; */ private boolean noVarHeader = false; // For remote downloads: @@ -229,13 +241,14 @@ public boolean canWrite() { private String remoteUrl; protected String remoteStoreName = null; protected URL remoteStoreUrl = null; - + // For HTTP-based downloads: - /*private GetMethod method = null; - private Header[] responseHeaders;*/ + /* + * private GetMethod method = null; private Header[] responseHeaders; + */ // getters: - + public Channel getChannel() throws IOException { return channel; } @@ -255,16 +268,15 @@ public ReadableByteChannel getReadChannel() throws IOException { return (ReadableByteChannel) channel; } - - public DvObject getDvObject() - { + + public DvObject getDvObject() { return dvObject; } - + public DataFile getDataFile() { return (DataFile) dvObject; } - + public Dataset getDataset() { return (Dataset) dvObject; } @@ -277,9 +289,9 @@ public DataAccessRequest getRequest() { return req; } - /*public int getStatus() { - return status; - }*/ + /* + * public int getStatus() { return status; } + */ public long getSize() { return size; @@ -292,9 +304,9 @@ public long getOffset() { public InputStream getInputStream() throws IOException { return in; } - + public OutputStream getOutputStream() throws IOException { - return out; + return out; } public String getMimeType() { @@ -317,23 +329,23 @@ public String getRemoteUrl() { return remoteUrl; } - public String getTemporarySwiftUrl(){ + public String getTemporarySwiftUrl() { return temporarySwiftUrl; } - + public String getTempUrlExpiry() { return tempUrlExpiry; } - + public String getTempUrlSignature() { return tempUrlSignature; } - + public String getSwiftFileName() { return swiftFileName; } - public String getSwiftContainerName(){ + public String getSwiftContainerName() { return swiftContainerName; } @@ -344,34 +356,32 @@ public String getRemoteStoreName() { public URL getRemoteStoreUrl() { return remoteStoreUrl; } - - /*public GetMethod getHTTPMethod() { - return method; - } - public Header[] getResponseHeaders() { - return responseHeaders; - }*/ + /* + * public GetMethod getHTTPMethod() { return method; } + * + * public Header[] getResponseHeaders() { return responseHeaders; } + */ public boolean isLocalFile() { return isLocalFile; } - - // "Direct Access" StorageIO is used to access a physical storage - // location not associated with any dvObject. (For example, when we - // are deleting a physical file left behind by a DataFile that's - // already been deleted from the database). + + // "Direct Access" StorageIO is used to access a physical storage + // location not associated with any dvObject. (For example, when we + // are deleting a physical file left behind by a DataFile that's + // already been deleted from the database). public boolean isDirectAccess() { - return dvObject == null; + return dvObject == null; } - /*public boolean isRemoteAccess() { - return isRemoteAccess; - }*/ + /* + * public boolean isRemoteAccess() { return isRemoteAccess; } + */ - /*public boolean isHttpAccess() { - return isHttpAccess; - }*/ + /* + * public boolean isHttpAccess() { return isHttpAccess; } + */ public boolean isDownloadSupported() { return isDownloadSupported; @@ -398,9 +408,9 @@ public void setRequest(DataAccessRequest dar) { req = dar; } - /*public void setStatus(int s) { - status = s; - }*/ + /* + * public void setStatus(int s) { status = s; } + */ public void setSize(long s) { size = s; @@ -421,11 +431,11 @@ public void setOffset(long offset) throws IOException { public void setInputStream(InputStream is) { in = is; } - + public void setOutputStream(OutputStream os) { - out = os; - } - + out = os; + } + public void setChannel(Channel c) { channel = c; } @@ -450,45 +460,46 @@ public void setRemoteUrl(String u) { remoteUrl = u; } - public void setTemporarySwiftUrl(String u){ + public void setTemporarySwiftUrl(String u) { temporarySwiftUrl = u; } - - public void setTempUrlExpiry(Long u){ + + public void setTempUrlExpiry(Long u) { tempUrlExpiry = String.valueOf(u); } - + public void setSwiftFileName(String u) { swiftFileName = u; } - - public void setTempUrlSignature(String u){ + + public void setTempUrlSignature(String u) { tempUrlSignature = u; } - public void setSwiftContainerName(String u){ + public void setSwiftContainerName(String u) { swiftContainerName = u; } - /*public void setHTTPMethod(GetMethod hm) { - method = hm; - }*/ + /* + * public void setHTTPMethod(GetMethod hm) { method = hm; } + */ - /*public void setResponseHeaders(Header[] headers) { - responseHeaders = headers; - }*/ + /* + * public void setResponseHeaders(Header[] headers) { responseHeaders = headers; + * } + */ public void setIsLocalFile(boolean f) { isLocalFile = f; } - /*public void setIsRemoteAccess(boolean r) { - isRemoteAccess = r; - }*/ + /* + * public void setIsRemoteAccess(boolean r) { isRemoteAccess = r; } + */ - /*public void setIsHttpAccess(boolean h) { - isHttpAccess = h; - }*/ + /* + * public void setIsHttpAccess(boolean h) { isHttpAccess = h; } + */ public void setIsDownloadSupported(boolean d) { isDownloadSupported = d; @@ -506,12 +517,11 @@ public void setNoVarHeader(boolean nvh) { noVarHeader = nvh; } - // connection management methods: - /*public void releaseConnection() { - if (method != null) { - method.releaseConnection(); - } - }*/ + // connection management methods: + /* + * public void releaseConnection() { if (method != null) { + * method.releaseConnection(); } } + */ public void closeInputStream() { if (in != null) { @@ -528,7 +538,7 @@ public void closeInputStream() { } } } - + public String generateVariableHeader(List dvs) { String varHeader = null; @@ -571,14 +581,14 @@ protected boolean isWriteAccessRequested(DataAccessOption... options) throws IOE return false; } - public boolean isBelowIngestSizeLimit() { - long limit = Long.parseLong(System.getProperty("dataverse.files." + this.driverId + ".ingestsizelimit", "-1")); - if(limit>0 && getSize()>limit) { - return false; - } else { - return true; - } - } + public boolean isBelowIngestSizeLimit() { + long limit = Long.parseLong(getConfigParam(INGEST_SIZE_LIMIT, "-1")); + if (limit > 0 && getSize() > limit) { + return false; + } else { + return true; + } + } public boolean downloadRedirectEnabled() { return false; @@ -587,35 +597,43 @@ public boolean downloadRedirectEnabled() { public boolean downloadRedirectEnabled(String auxObjectTag) { return false; } - - public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException { + + public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) + throws IOException { throw new UnsupportedDataAccessOperationException("Direct download not implemented for this storage type"); } - public static boolean isPublicStore(String driverId) { - //Read once and cache - if(!driverPublicAccessMap.containsKey(driverId)) { - driverPublicAccessMap.put(driverId, Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".public"))); + // Read once and cache + if (!driverPublicAccessMap.containsKey(driverId)) { + driverPublicAccessMap.put(driverId, + Boolean.parseBoolean(getConfigParamForDriver(driverId, PUBLIC))); } return driverPublicAccessMap.get(driverId); } - + public static String getDriverPrefix(String driverId) { - return driverId+ DataAccess.SEPARATOR; + return driverId + DataAccess.SEPARATOR; } - + public static boolean isDirectUploadEnabled(String driverId) { - return Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-redirect")); + return (getConfigParamForDriver(driverId, TYPE).equals(DataAccess.S3) + && Boolean.parseBoolean(getConfigParamForDriver(driverId, UPLOAD_REDIRECT))) + || Boolean.parseBoolean(getConfigParamForDriver(driverId, UPLOAD_OUT_OF_BAND)); + } + + //True by default, Stores (e.g. RemoteOverlay, Globus) can set this false to stop attempts to read bytes + public static boolean isDataverseAccessible(String driverId) { + return (true && !Boolean.parseBoolean(getConfigParamForDriver(driverId, DATAVERSE_INACCESSIBLE))); } - //Check that storageIdentifier is consistent with store's config - //False will prevent direct uploads - protected static boolean isValidIdentifier(String driverId, String storageId) { + // Check that storageIdentifier is consistent with store's config + // False will prevent direct uploads + static boolean isValidIdentifier(String driverId, String storageId) { return false; } - - //Utility to verify the standard UUID pattern for stored files. + + // Utility to verify the standard UUID pattern for stored files. protected static boolean usesStandardNamePattern(String identifier) { Pattern r = Pattern.compile("^[a-f,0-9]{11}-[a-f,0-9]{12}$"); @@ -625,4 +643,44 @@ protected static boolean usesStandardNamePattern(String identifier) { public abstract List cleanUp(Predicate filter, boolean dryRun) throws IOException; + /** + * A storage-type-specific mechanism for retrieving the size of a file. Intended + * primarily as a way to get the size before it has been recorded in the + * database, e.g. during direct/out-of-band transfers but could be useful to + * check the db values. + * + * @return file size in bytes + * @throws IOException + */ + public abstract long retrieveSizeFromMedia() throws IOException; + + + /* Convenience methods to get a driver-specific parameter + * + * - with or without a default + * - static or per object + * + * @param parameterName + * @return the parameter value + */ + + protected String getConfigParam(String parameterName) { + return getConfigParam(parameterName, null); + } + + protected String getConfigParam(String parameterName, String defaultValue) { + return getConfigParamForDriver(this.driverId, parameterName, defaultValue); + } + + protected static String getConfigParamForDriver(String driverId, String parameterName) { + return getConfigParamForDriver(driverId, parameterName, null); + } + protected static String getConfigParamForDriver(String driverId, String parameterName, String defaultValue) { + return System.getProperty("dataverse.files." + driverId + "." + parameterName, defaultValue); + } + + public static String getNewIdentifier(String driverId) { + return driverId + DataAccess.SEPARATOR + FileUtil.generateStorageIdentifier(); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index 6c84009de3e..717f46ffd60 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -50,6 +50,17 @@ public class SwiftAccessIO extends StorageIO { private String swiftLocation; private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.SwiftAccessIO"); + private static final String IS_PUBLIC_CONTAINER = "isPublicContainer"; + private static final String FOLDER_PATH_SEPARATOR = "folderPathSeparator"; + private static final String DEFAULT_ENDPOINT = "defaultEndpoint"; + private static final String TEMPORARY_URL_EXPIRY_TIME = "temporaryUrlExpiryTime"; + private static final String AUTH_URL = "authUrl"; + private static final String USERNAME = "username"; + private static final String PASSWORD = "password"; + private static final String TENANT = "tenant"; + private static final String AUTH_TYPE = "authType"; + private static final String HASH_KEY = "hashKey"; + private static final String ENDPOINT = "endpoint"; public SwiftAccessIO() { //Partially functional StorageIO object - constructor only for testing @@ -70,10 +81,10 @@ public SwiftAccessIO(String swiftLocation, String driverId) { } private void readSettings() { - isPublicContainer = Boolean.parseBoolean(System.getProperty("dataverse.files." + this.driverId + ".isPublicContainer", "true")); - swiftFolderPathSeparator = System.getProperty("dataverse.files." + this.driverId + ".folderPathSeparator", "_"); - swiftDefaultEndpoint = System.getProperty("dataverse.files." + this.driverId + ".defaultEndpoint"); - tempUrlExpires = Integer.parseInt(System.getProperty("dataverse.files." + this.driverId + ".temporaryUrlExpiryTime", "60")); + isPublicContainer = Boolean.parseBoolean(getConfigParam(IS_PUBLIC_CONTAINER, "true")); + swiftFolderPathSeparator = getConfigParam(FOLDER_PATH_SEPARATOR, "_"); + swiftDefaultEndpoint = getConfigParam(DEFAULT_ENDPOINT); + tempUrlExpires = Integer.parseInt(getConfigParam(TEMPORARY_URL_EXPIRY_TIME, "60")); } @@ -131,7 +142,8 @@ public void open(DataAccessOption... options) throws IOException { && dataFile.getContentType().equals("text/tab-separated-values") && dataFile.isTabularData() && dataFile.getDataTable() != null - && (!this.noVarHeader())) { + && (!this.noVarHeader()) + && (!dataFile.getDataTable().isStoredWithVariableHeader())) { List datavariables = dataFile.getDataTable().getDataVariables(); String varHeaderLine = generateVariableHeader(datavariables); @@ -740,12 +752,12 @@ private StoredObject openSwiftAuxFile(boolean writeAccess, String auxItemTag) th } Account authenticateWithSwift(String swiftEndPoint) throws IOException { - String swiftEndPointAuthUrl = System.getProperty("dataverse.files." + this.driverId + ".authUrl." + swiftEndPoint); - String swiftEndPointUsername = System.getProperty("dataverse.files." + this.driverId + ".username." + swiftEndPoint); - String swiftEndPointSecretKey = System.getProperty("dataverse.files." + this.driverId + ".password." + swiftEndPoint); - String swiftEndPointTenantName = System.getProperty("dataverse.files." + this.driverId + ".tenant." + swiftEndPoint); - String swiftEndPointAuthMethod = System.getProperty("dataverse.files." + this.driverId + ".authType." + swiftEndPoint); - String swiftEndPointTenantId = System.getProperty("dataverse.files." + this.driverId + ".tenant." + swiftEndPoint); + String swiftEndPointAuthUrl = getConfigParam(AUTH_URL + "." + swiftEndPoint); + String swiftEndPointUsername = getConfigParam(USERNAME + "." + swiftEndPoint); + String swiftEndPointSecretKey = getConfigParam(PASSWORD + "." + swiftEndPoint); + String swiftEndPointTenantName = getConfigParam(TENANT + "." + swiftEndPoint); + String swiftEndPointAuthMethod = getConfigParam(AUTH_TYPE + "." + swiftEndPoint); + String swiftEndPointTenantId = getConfigParam(TENANT + "." + swiftEndPoint); if (swiftEndPointAuthUrl == null || swiftEndPointUsername == null || swiftEndPointSecretKey == null || "".equals(swiftEndPointAuthUrl) || "".equals(swiftEndPointUsername) || "".equals(swiftEndPointSecretKey)) { @@ -814,7 +826,7 @@ private String getSwiftFileURI(StoredObject fileObject) throws IOException { private String hmac = null; public String generateTempUrlSignature(String swiftEndPoint, String containerName, String objectName, int duration) throws IOException { if (hmac == null || isExpiryExpired(generateTempUrlExpiry(duration, System.currentTimeMillis()), duration, System.currentTimeMillis())) { - String secretKey = System.getProperty("dataverse.files." + this.driverId + ".hashKey." + swiftEndPoint); + String secretKey = getConfigParam(HASH_KEY + "." + swiftEndPoint); if (secretKey == null) { throw new IOException("Please input a hash key under dataverse.files." + this.driverId + ".hashKey." + swiftEndPoint); } @@ -841,7 +853,7 @@ public long generateTempUrlExpiry(int duration, long currentTime) { private String temporaryUrl = null; private String generateTemporarySwiftUrl(String swiftEndPoint, String containerName, String objectName, int duration) throws IOException { - String baseUrl = System.getProperty("dataverse.files." + this.driverId + ".endpoint." + swiftEndPoint); + String baseUrl = getConfigParam(ENDPOINT + "." + swiftEndPoint); String path = "/v1/" + containerName + "/" + objectName; if (temporaryUrl == null || isExpiryExpired(generateTempUrlExpiry(duration, System.currentTimeMillis()), duration, System.currentTimeMillis())) { @@ -954,4 +966,9 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE } return toDelete; } + + @Override + public long retrieveSizeFromMedia() throws IOException { + throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java index 782f7f3a52d..c369010c8cd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java @@ -60,305 +60,26 @@ public class TabularSubsetGenerator implements SubsetGenerator { - private static Logger dbgLog = Logger.getLogger(TabularSubsetGenerator.class.getPackage().getName()); + private static Logger logger = Logger.getLogger(TabularSubsetGenerator.class.getPackage().getName()); - private static int COLUMN_TYPE_STRING = 1; - private static int COLUMN_TYPE_LONG = 2; - private static int COLUMN_TYPE_DOUBLE = 3; - private static int COLUMN_TYPE_FLOAT = 4; - - private static int MAX_COLUMN_BUFFER = 8192; - - private FileChannel fileChannel = null; - - private int varcount; - private int casecount; - private int subsetcount; - - private byte[][] columnEntries = null; - - - private ByteBuffer[] columnByteBuffers; - private int[] columnBufferSizes; - private int[] columnBufferOffsets; - - private long[] columnStartOffsets; - private long[] columnTotalOffsets; - private long[] columnTotalLengths; - - public TabularSubsetGenerator() { - - } - - public TabularSubsetGenerator (DataFile datafile, List variables) throws IOException { - if (!datafile.isTabularData()) { - throw new IOException("DataFile is not tabular data."); - } - - setVarCount(datafile.getDataTable().getVarQuantity().intValue()); - setCaseCount(datafile.getDataTable().getCaseQuantity().intValue()); - - - - StorageIO dataAccess = datafile.getStorageIO(); - if (!dataAccess.isLocalFile()) { - throw new IOException("Subsetting is supported on local files only!"); - } - - //File tabfile = datafile.getFileSystemLocation().toFile(); - File tabfile = dataAccess.getFileSystemPath().toFile(); + //private static int MAX_COLUMN_BUFFER = 8192; - File rotatedImageFile = getRotatedImage(tabfile, getVarCount(), getCaseCount()); - long[] columnEndOffsets = extractColumnOffsets(rotatedImageFile, getVarCount(), getCaseCount()); - - fileChannel = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()), StandardOpenOption.READ)); - - if (variables == null || variables.size() < 1 || variables.size() > getVarCount()) { - throw new IOException("Illegal number of variables in the subset request"); - } - - subsetcount = variables.size(); - columnTotalOffsets = new long[subsetcount]; - columnTotalLengths = new long[subsetcount]; - columnByteBuffers = new ByteBuffer[subsetcount]; - - + public TabularSubsetGenerator() { - if (subsetcount == 1) { - if (!datafile.getDataTable().getId().equals(variables.get(0).getDataTable().getId())) { - throw new IOException("Variable in the subset request does not belong to the datafile."); - } - dbgLog.fine("single variable subset; setting fileChannel position to "+extractColumnOffset(columnEndOffsets, variables.get(0).getFileOrder())); - fileChannel.position(extractColumnOffset(columnEndOffsets, variables.get(0).getFileOrder())); - columnTotalLengths[0] = extractColumnLength(columnEndOffsets, variables.get(0).getFileOrder()); - columnTotalOffsets[0] = 0; - } else { - columnEntries = new byte[subsetcount][]; - - columnBufferSizes = new int[subsetcount]; - columnBufferOffsets = new int[subsetcount]; - columnStartOffsets = new long[subsetcount]; - - int i = 0; - for (DataVariable var : variables) { - if (!datafile.getDataTable().getId().equals(var.getDataTable().getId())) { - throw new IOException("Variable in the subset request does not belong to the datafile."); - } - columnByteBuffers[i] = ByteBuffer.allocate(MAX_COLUMN_BUFFER); - columnTotalLengths[i] = extractColumnLength(columnEndOffsets, var.getFileOrder()); - columnStartOffsets[i] = extractColumnOffset(columnEndOffsets, var.getFileOrder()); - if (columnTotalLengths[i] < MAX_COLUMN_BUFFER) { - columnByteBuffers[i].limit((int)columnTotalLengths[i]); - } - fileChannel.position(columnStartOffsets[i]); - columnBufferSizes[i] = fileChannel.read(columnByteBuffers[i]); - columnBufferOffsets[i] = 0; - columnTotalOffsets[i] = columnBufferSizes[i]; - i++; - } - } - } - - private int getVarCount() { - return varcount; } - private void setVarCount(int varcount) { - this.varcount = varcount; - } - - private int getCaseCount() { - return casecount; - } - - private void setCaseCount(int casecount) { - this.casecount = casecount; - } - - - /* - * Note that this method operates on the *absolute* column number, i.e. - * the number of the physical column in the tabular file. This is stored - * in DataVariable.FileOrder. - * This "column number" should not be confused with the number of column - * in the subset request; a user can request any number of variable - * columns, in an order that doesn't have to follow the physical order - * of the columns in the file. - */ - private long extractColumnOffset(long[] columnEndOffsets, int column) throws IOException { - if (columnEndOffsets == null || columnEndOffsets.length <= column) { - throw new IOException("Offsets table not initialized; or column out of bounds."); - } - long columnOffset; - - if (column > 0) { - columnOffset = columnEndOffsets[column - 1]; - } else { - columnOffset = getVarCount() * 8; - } - return columnOffset; - } - - /* - * See the comment for the method above. + /** + * This class used to be much more complex. There were methods for subsetting + * from fixed-width field files; including using the optimized, "90 deg. rotated" + * versions of such files (i.e. you create a *columns-wise* copy of your data + * file in which the columns are stored sequentially, and a table of byte + * offsets of each column. You can then read individual variable columns + * for cheap; at the expense of doubling the storage size of your tabular + * data files. These methods were not used, so they were deleted (in Jan. 2024 + * prior to 6.2. + * Please consult git history if you are interested in looking at that code. */ - private long extractColumnLength(long[] columnEndOffsets, int column) throws IOException { - if (columnEndOffsets == null || columnEndOffsets.length <= column) { - throw new IOException("Offsets table not initialized; or column out of bounds."); - } - long columnLength; - - if (column > 0) { - columnLength = columnEndOffsets[column] - columnEndOffsets[column - 1]; - } else { - columnLength = columnEndOffsets[0] - varcount * 8; - } - - return columnLength; - } - - - private void bufferMoreColumnBytes(int column) throws IOException { - if (columnTotalOffsets[column] >= columnTotalLengths[column]) { - throw new IOException("attempt to buffer bytes past the column boundary"); - } - fileChannel.position(columnStartOffsets[column] + columnTotalOffsets[column]); - - columnByteBuffers[column].clear(); - if (columnTotalLengths[column] < columnTotalOffsets[column] + MAX_COLUMN_BUFFER) { - dbgLog.fine("Limiting the buffer to "+(columnTotalLengths[column] - columnTotalOffsets[column])+" bytes"); - columnByteBuffers[column].limit((int) (columnTotalLengths[column] - columnTotalOffsets[column])); - } - columnBufferSizes[column] = fileChannel.read(columnByteBuffers[column]); - dbgLog.fine("Read "+columnBufferSizes[column]+" bytes for subset column "+column); - columnBufferOffsets[column] = 0; - columnTotalOffsets[column] += columnBufferSizes[column]; - } - - public byte[] readColumnEntryBytes(int column) { - return readColumnEntryBytes(column, true); - } - - - public byte[] readColumnEntryBytes(int column, boolean addTabs) { - byte[] leftover = null; - byte[] ret = null; - - if (columnBufferOffsets[column] >= columnBufferSizes[column]) { - try { - bufferMoreColumnBytes(column); - if (columnBufferSizes[column] < 1) { - return null; - } - } catch (IOException ioe) { - return null; - } - } - - int byteindex = columnBufferOffsets[column]; - try { - while (columnByteBuffers[column].array()[byteindex] != '\n') { - byteindex++; - if (byteindex == columnBufferSizes[column]) { - // save the leftover: - if (leftover == null) { - leftover = new byte[columnBufferSizes[column] - columnBufferOffsets[column]]; - System.arraycopy(columnByteBuffers[column].array(), columnBufferOffsets[column], leftover, 0, columnBufferSizes[column] - columnBufferOffsets[column]); - } else { - byte[] merged = new byte[leftover.length + columnBufferSizes[column]]; - - System.arraycopy(leftover, 0, merged, 0, leftover.length); - System.arraycopy(columnByteBuffers[column].array(), 0, merged, leftover.length, columnBufferSizes[column]); - leftover = merged; - merged = null; - } - // read more bytes: - bufferMoreColumnBytes(column); - if (columnBufferSizes[column] < 1) { - return null; - } - byteindex = 0; - } - } - - // presumably, we have found our '\n': - if (leftover == null) { - ret = new byte[byteindex - columnBufferOffsets[column] + 1]; - System.arraycopy(columnByteBuffers[column].array(), columnBufferOffsets[column], ret, 0, byteindex - columnBufferOffsets[column] + 1); - } else { - ret = new byte[leftover.length + byteindex + 1]; - System.arraycopy(leftover, 0, ret, 0, leftover.length); - System.arraycopy(columnByteBuffers[column].array(), 0, ret, leftover.length, byteindex + 1); - } - - } catch (IOException ioe) { - return null; - } - - columnBufferOffsets[column] = (byteindex + 1); - - if (column < columnBufferOffsets.length - 1) { - ret[ret.length - 1] = '\t'; - } - return ret; - } - - public int readSingleColumnSubset(byte[] buffer) throws IOException { - if (columnTotalOffsets[0] == columnTotalLengths[0]) { - return -1; - } - - if (columnByteBuffers[0] == null) { - dbgLog.fine("allocating single column subset buffer."); - columnByteBuffers[0] = ByteBuffer.allocate(buffer.length); - } - - int bytesread = fileChannel.read(columnByteBuffers[0]); - dbgLog.fine("single column subset: read "+bytesread+" bytes."); - if (columnTotalOffsets[0] + bytesread > columnTotalLengths[0]) { - bytesread = (int)(columnTotalLengths[0] - columnTotalOffsets[0]); - } - System.arraycopy(columnByteBuffers[0].array(), 0, buffer, 0, bytesread); - - columnTotalOffsets[0] += bytesread; - columnByteBuffers[0].clear(); - return bytesread > 0 ? bytesread : -1; - } - - - public byte[] readSubsetLineBytes() throws IOException { - byte[] ret = null; - int total = 0; - for (int i = 0; i < subsetcount; i++) { - columnEntries[i] = readColumnEntryBytes(i); - if (columnEntries[i] == null) { - throw new IOException("Failed to read subset line entry"); - } - total += columnEntries[i].length; - } - - ret = new byte[total]; - int offset = 0; - for (int i = 0; i < subsetcount; i++) { - System.arraycopy(columnEntries[i], 0, ret, offset, columnEntries[i].length); - offset += columnEntries[i].length; - } - dbgLog.fine("line: "+new String(ret)); - return ret; - } - - - public void close() { - if (fileChannel != null) { - try { - fileChannel.close(); - } catch (IOException ioe) { - // don't care. - } - } - } - public void subsetFile(String infile, String outfile, List columns, Long numCases) { subsetFile(infile, outfile, columns, numCases, "\t"); } @@ -411,11 +132,15 @@ public void subsetFile(InputStream in, String outfile, List columns, Lo * files, OK to use on small files: */ - public static Double[] subsetDoubleVector(InputStream in, int column, int numCases) { + public static Double[] subsetDoubleVector(InputStream in, int column, int numCases, boolean skipHeader) { Double[] retVector = new Double[numCases]; try (Scanner scanner = new Scanner(in)) { scanner.useDelimiter("\\n"); + if (skipHeader) { + skipFirstLine(scanner); + } + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { if (scanner.hasNext()) { String[] line = (scanner.next()).split("\t", -1); @@ -463,11 +188,15 @@ public static Double[] subsetDoubleVector(InputStream in, int column, int numCas * Same deal as with the method above - straightforward, but (potentially) slow. * Not a resource hog though - will only try to store one vector in memory. */ - public static Float[] subsetFloatVector(InputStream in, int column, int numCases) { + public static Float[] subsetFloatVector(InputStream in, int column, int numCases, boolean skipHeader) { Float[] retVector = new Float[numCases]; try (Scanner scanner = new Scanner(in)) { scanner.useDelimiter("\\n"); + if (skipHeader) { + skipFirstLine(scanner); + } + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { if (scanner.hasNext()) { String[] line = (scanner.next()).split("\t", -1); @@ -513,11 +242,15 @@ public static Float[] subsetFloatVector(InputStream in, int column, int numCases * Same deal as with the method above - straightforward, but (potentially) slow. * Not a resource hog though - will only try to store one vector in memory. */ - public static Long[] subsetLongVector(InputStream in, int column, int numCases) { + public static Long[] subsetLongVector(InputStream in, int column, int numCases, boolean skipHeader) { Long[] retVector = new Long[numCases]; try (Scanner scanner = new Scanner(in)) { scanner.useDelimiter("\\n"); + if (skipHeader) { + skipFirstLine(scanner); + } + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { if (scanner.hasNext()) { String[] line = (scanner.next()).split("\t", -1); @@ -549,11 +282,15 @@ public static Long[] subsetLongVector(InputStream in, int column, int numCases) * Same deal as with the method above - straightforward, but (potentially) slow. * Not a resource hog though - will only try to store one vector in memory. */ - public static String[] subsetStringVector(InputStream in, int column, int numCases) { + public static String[] subsetStringVector(InputStream in, int column, int numCases, boolean skipHeader) { String[] retVector = new String[numCases]; try (Scanner scanner = new Scanner(in)) { scanner.useDelimiter("\\n"); + if (skipHeader) { + skipFirstLine(scanner); + } + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { if (scanner.hasNext()) { String[] line = (scanner.next()).split("\t", -1); @@ -621,819 +358,10 @@ public static String[] subsetStringVector(InputStream in, int column, int numCas } - /* - * Straightforward method for subsetting a tab-delimited data file, extracting - * all the columns representing continuous variables and returning them as - * a 2-dimensional array of Doubles; - * Inefficient on large files, OK to use on small ones. - */ - public static Double[][] subsetDoubleVectors(InputStream in, Set columns, int numCases) throws IOException { - Double[][] retVector = new Double[columns.size()][numCases]; - try (Scanner scanner = new Scanner(in)) { - scanner.useDelimiter("\\n"); - - for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split("\t", -1); - int j = 0; - for (Integer i : columns) { - try { - // TODO: verify that NaN and +-Inf are going to be - // handled correctly here! -- L.A. - // NO, "+-Inf" is not handled correctly; see the - // comment further down below. - retVector[j][caseIndex] = new Double(line[i]); - } catch (NumberFormatException ex) { - retVector[j][caseIndex] = null; // missing value - } - j++; - } - } else { - throw new IOException("Tab file has fewer rows than the stored number of cases!"); - } - } - - int tailIndex = numCases; - while (scanner.hasNext()) { - String nextLine = scanner.next(); - if (!"".equals(nextLine)) { - throw new IOException("Tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); - } - tailIndex++; - } - - } - return retVector; - - } - - public String[] subsetStringVector(DataFile datafile, int column) throws IOException { - return (String[])subsetObjectVector(datafile, column, COLUMN_TYPE_STRING); - } - - public Double[] subsetDoubleVector(DataFile datafile, int column) throws IOException { - return (Double[])subsetObjectVector(datafile, column, COLUMN_TYPE_DOUBLE); - } - - public Long[] subsetLongVector(DataFile datafile, int column) throws IOException { - return (Long[])subsetObjectVector(datafile, column, COLUMN_TYPE_LONG); - } - - // Float methods are temporary; - // In normal operations we'll be treating all the floating point types as - // doubles. I need to be able to handle floats for some 4.0 vs 3.* ingest - // tests. -- L.A. - - public Float[] subsetFloatVector(DataFile datafile, int column) throws IOException { - return (Float[])subsetObjectVector(datafile, column, COLUMN_TYPE_FLOAT); - } - - public String[] subsetStringVector(File tabfile, int column, int varcount, int casecount) throws IOException { - return (String[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_STRING); - } - - public Double[] subsetDoubleVector(File tabfile, int column, int varcount, int casecount) throws IOException { - return (Double[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_DOUBLE); - } - - public Long[] subsetLongVector(File tabfile, int column, int varcount, int casecount) throws IOException { - return (Long[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_LONG); - } - - public Float[] subsetFloatVector(File tabfile, int column, int varcount, int casecount) throws IOException { - return (Float[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_FLOAT); - } - - public Object[] subsetObjectVector(DataFile dataFile, int column, int columntype) throws IOException { - if (!dataFile.isTabularData()) { - throw new IOException("DataFile is not tabular data."); - } - - int varcount = dataFile.getDataTable().getVarQuantity().intValue(); - int casecount = dataFile.getDataTable().getCaseQuantity().intValue(); - - if (column >= varcount) { - throw new IOException("Column "+column+" is out of bounds."); - } - - StorageIO dataAccess = dataFile.getStorageIO(); - if (!dataAccess.isLocalFile()) { - throw new IOException("Subsetting is supported on local files only!"); - } - - //File tabfile = datafile.getFileSystemLocation().toFile(); - File tabfile = dataAccess.getFileSystemPath().toFile(); - - if (columntype == COLUMN_TYPE_STRING) { - String filename = dataFile.getFileMetadata().getLabel(); - if (filename != null) { - filename = filename.replaceFirst("^_", ""); - Integer fnumvalue = null; - try { - fnumvalue = new Integer(filename); - } catch (Exception ex){ - fnumvalue = null; - } - if (fnumvalue != null) { - //if ((fnumvalue.intValue() < 112497)) { // && (fnumvalue.intValue() > 60015)) { - if ((fnumvalue.intValue() < 111931)) { // && (fnumvalue.intValue() > 60015)) { - if (!(fnumvalue.intValue() == 60007 - || fnumvalue.intValue() == 59997 - || fnumvalue.intValue() == 60015 - || fnumvalue.intValue() == 59948 - || fnumvalue.intValue() == 60012 - || fnumvalue.intValue() == 52585 - || fnumvalue.intValue() == 60005 - || fnumvalue.intValue() == 60002 - || fnumvalue.intValue() == 59954 - || fnumvalue.intValue() == 60008 - || fnumvalue.intValue() == 54972 - || fnumvalue.intValue() == 55010 - || fnumvalue.intValue() == 54996 - || fnumvalue.intValue() == 53527 - || fnumvalue.intValue() == 53546 - || fnumvalue.intValue() == 55002 - || fnumvalue.intValue() == 55006 - || fnumvalue.intValue() == 54998 - || fnumvalue.intValue() == 52552 - // SPSS/SAV cases with similar issue - compat mode must be disabled - //|| fnumvalue.intValue() == 101826 // temporary - tricky file with accents and v. 16... - || fnumvalue.intValue() == 54618 // another SAV file, with long strings... - || fnumvalue.intValue() == 54619 // [same] - || fnumvalue.intValue() == 57983 - || fnumvalue.intValue() == 58262 - || fnumvalue.intValue() == 58288 - || fnumvalue.intValue() == 58656 - || fnumvalue.intValue() == 59144 - // || fnumvalue.intValue() == 69626 [nope!] - )) { - dbgLog.info("\"Old\" file name detected; using \"compatibility mode\" for a character vector subset;"); - return subsetObjectVector(tabfile, column, varcount, casecount, columntype, true); - } - } - } - } + private static void skipFirstLine(Scanner scanner) { + if (!scanner.hasNext()) { + throw new RuntimeException("Failed to read the variable name header line from the tab-delimited file!"); } - - return subsetObjectVector(tabfile, column, varcount, casecount, columntype); - } - - public Object[] subsetObjectVector(File tabfile, int column, int varcount, int casecount, int columntype) throws IOException { - return subsetObjectVector(tabfile, column, varcount, casecount, columntype, false); - } - - - - public Object[] subsetObjectVector(File tabfile, int column, int varcount, int casecount, int columntype, boolean compatmode) throws IOException { - - Object[] retVector = null; - - boolean isString = false; - boolean isDouble = false; - boolean isLong = false; - boolean isFloat = false; - - //Locale loc = new Locale("en", "US"); - - if (columntype == COLUMN_TYPE_STRING) { - isString = true; - retVector = new String[casecount]; - } else if (columntype == COLUMN_TYPE_DOUBLE) { - isDouble = true; - retVector = new Double[casecount]; - } else if (columntype == COLUMN_TYPE_LONG) { - isLong = true; - retVector = new Long[casecount]; - } else if (columntype == COLUMN_TYPE_FLOAT){ - isFloat = true; - retVector = new Float[casecount]; - } else { - throw new IOException("Unsupported column type: "+columntype); - } - - File rotatedImageFile = getRotatedImage(tabfile, varcount, casecount); - long[] columnEndOffsets = extractColumnOffsets(rotatedImageFile, varcount, casecount); - long columnOffset = 0; - long columnLength = 0; - - if (column > 0) { - columnOffset = columnEndOffsets[column - 1]; - columnLength = columnEndOffsets[column] - columnEndOffsets[column - 1]; - } else { - columnOffset = varcount * 8; - columnLength = columnEndOffsets[0] - varcount * 8; - } - int caseindex = 0; - - try (FileChannel fc = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()), - StandardOpenOption.READ))) { - fc.position(columnOffset); - int MAX_COLUMN_BUFFER = 8192; - - ByteBuffer in = ByteBuffer.allocate(MAX_COLUMN_BUFFER); - - if (columnLength < MAX_COLUMN_BUFFER) { - in.limit((int) (columnLength)); - } - - long bytesRead = 0; - long bytesReadTotal = 0; - - int byteoffset = 0; - byte[] leftover = null; - - while (bytesReadTotal < columnLength) { - bytesRead = fc.read(in); - byte[] columnBytes = in.array(); - int bytecount = 0; - - while (bytecount < bytesRead) { - if (columnBytes[bytecount] == '\n') { - /* - String token = new String(columnBytes, byteoffset, bytecount-byteoffset, "UTF8"); - - if (leftover != null) { - String leftoverString = new String (leftover, "UTF8"); - token = leftoverString + token; - leftover = null; - } - */ - /* - * Note that the way I was doing it at first - above - - * was not quite the correct way - because I was creating UTF8 - * strings from the leftover bytes, and the bytes in the - * current buffer *separately*; which means, if a multi-byte - * UTF8 character got split in the middle between one buffer - * and the next, both chunks of it would become junk - * characters, on each side! - * The correct way of doing it, of course, is to create a - * merged byte buffer, and then turn it into a UTF8 string. - * -- L.A. 4.0 - */ - String token = null; - - if (leftover == null) { - token = new String(columnBytes, byteoffset, bytecount - byteoffset, "UTF8"); - } else { - byte[] merged = new byte[leftover.length + bytecount - byteoffset]; - - System.arraycopy(leftover, 0, merged, 0, leftover.length); - System.arraycopy(columnBytes, byteoffset, merged, leftover.length, bytecount - byteoffset); - token = new String(merged, "UTF8"); - leftover = null; - merged = null; - } - - if (isString) { - if ("".equals(token)) { - // An empty string is a string missing value! - // An empty string in quotes is an empty string! - retVector[caseindex] = null; - } else { - // Strip the outer quotes: - token = token.replaceFirst("^\\\"", ""); - token = token.replaceFirst("\\\"$", ""); - - // We need to restore the special characters that - // are stored in tab files escaped - quotes, new lines - // and tabs. Before we do that however, we need to - // take care of any escaped backslashes stored in - // the tab file. I.e., "foo\t" should be transformed - // to "foo"; but "foo\\t" should be transformed - // to "foo\t". This way new lines and tabs that were - // already escaped in the original data are not - // going to be transformed to unescaped tab and - // new line characters! - - String[] splitTokens = token.split(Matcher.quoteReplacement("\\\\"), -2); - - // (note that it's important to use the 2-argument version - // of String.split(), and set the limit argument to a - // negative value; otherwise any trailing backslashes - // are lost.) - - for (int i = 0; i < splitTokens.length; i++) { - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\\""), "\""); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\t"), "\t"); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\n"), "\n"); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\r"), "\r"); - } - // TODO: - // Make (some of?) the above optional; for ex., we - // do need to restore the newlines when calculating UNFs; - // But if we are subsetting these vectors in order to - // create a new tab-delimited file, they will - // actually break things! -- L.A. Jul. 28 2014 - - token = StringUtils.join(splitTokens, '\\'); - - // "compatibility mode" - a hack, to be able to produce - // unfs identical to those produced by the "early" - // unf5 jar; will be removed in production 4.0. - // -- L.A. (TODO: ...) - if (compatmode && !"".equals(token)) { - if (token.length() > 128) { - if ("".equals(token.trim())) { - // don't ask... - token = token.substring(0, 129); - } else { - token = token.substring(0, 128); - // token = String.format(loc, "%.128s", token); - token = token.trim(); - // dbgLog.info("formatted and trimmed: "+token); - } - } else { - if ("".equals(token.trim())) { - // again, don't ask; - // - this replicates some bugginness - // that happens inside unf5; - token = "null"; - } else { - token = token.trim(); - } - } - } - - retVector[caseindex] = token; - } - } else if (isDouble) { - try { - // TODO: verify that NaN and +-Inf are - // handled correctly here! -- L.A. - // Verified: new Double("nan") works correctly, - // resulting in Double.NaN; - // Double("[+-]Inf") doesn't work however; - // (the constructor appears to be expecting it - // to be spelled as "Infinity", "-Infinity", etc. - if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Double.POSITIVE_INFINITY; - } else if ("-inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Double.NEGATIVE_INFINITY; - } else if (token == null || token.equals("")) { - // missing value: - retVector[caseindex] = null; - } else { - retVector[caseindex] = new Double(token); - } - } catch (NumberFormatException ex) { - dbgLog.warning("NumberFormatException thrown for " + token + " as Double"); - - retVector[caseindex] = null; // missing value - // TODO: ? - } - } else if (isLong) { - try { - retVector[caseindex] = new Long(token); - } catch (NumberFormatException ex) { - retVector[caseindex] = null; // assume missing value - } - } else if (isFloat) { - try { - if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Float.POSITIVE_INFINITY; - } else if ("-inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Float.NEGATIVE_INFINITY; - } else if (token == null || token.equals("")) { - // missing value: - retVector[caseindex] = null; - } else { - retVector[caseindex] = new Float(token); - } - } catch (NumberFormatException ex) { - dbgLog.warning("NumberFormatException thrown for " + token + " as Float"); - retVector[caseindex] = null; // assume missing value (TODO: ?) - } - } - caseindex++; - - if (bytecount == bytesRead - 1) { - byteoffset = 0; - } else { - byteoffset = bytecount + 1; - } - } else { - if (bytecount == bytesRead - 1) { - // We've reached the end of the buffer; - // This means we'll save whatever unused bytes left in - // it - i.e., the bytes between the last new line - // encountered and the end - in the leftover buffer. - - // *EXCEPT*, there may be a case of a very long String - // that is actually longer than MAX_COLUMN_BUFFER, in - // which case it is possible that we've read through - // an entire buffer of bytes without finding any - // new lines... in this case we may need to add this - // entire byte buffer to an already existing leftover - // buffer! - if (leftover == null) { - leftover = new byte[(int) bytesRead - byteoffset]; - System.arraycopy(columnBytes, byteoffset, leftover, 0, (int) bytesRead - byteoffset); - } else { - if (byteoffset != 0) { - throw new IOException("Reached the end of the byte buffer, with some leftover left from the last read; yet the offset is not zero!"); - } - byte[] merged = new byte[leftover.length + (int) bytesRead]; - - System.arraycopy(leftover, 0, merged, 0, leftover.length); - System.arraycopy(columnBytes, byteoffset, merged, leftover.length, (int) bytesRead); - // leftover = null; - leftover = merged; - merged = null; - } - byteoffset = 0; - - } - } - bytecount++; - } - - bytesReadTotal += bytesRead; - in.clear(); - if (columnLength - bytesReadTotal < MAX_COLUMN_BUFFER) { - in.limit((int) (columnLength - bytesReadTotal)); - } - } - - } - - if (caseindex != casecount) { - throw new IOException("Faile to read "+casecount+" tokens for column "+column); - //System.out.println("read "+caseindex+" tokens instead of expected "+casecount+"."); - } - - return retVector; - } - - private long[] extractColumnOffsets (File rotatedImageFile, int varcount, int casecount) throws IOException { - long[] byteOffsets = new long[varcount]; - - try (BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotatedImageFile))) { - - byte[] offsetHeader = new byte[varcount * 8]; - - int readlen = rotfileStream.read(offsetHeader); - - if (readlen != varcount * 8) { - throw new IOException("Could not read " + varcount * 8 + " header bytes from the rotated file."); - } - - for (int varindex = 0; varindex < varcount; varindex++) { - byte[] offsetBytes = new byte[8]; - System.arraycopy(offsetHeader, varindex * 8, offsetBytes, 0, 8); - - ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes); - byteOffsets[varindex] = offsetByteBuffer.getLong(); - - // System.out.println(byteOffsets[varindex]); - } - - } - - return byteOffsets; - } - - private File getRotatedImage(File tabfile, int varcount, int casecount) throws IOException { - String fileName = tabfile.getAbsolutePath(); - String rotatedImageFileName = fileName + ".90d"; - File rotatedImageFile = new File(rotatedImageFileName); - if (rotatedImageFile.exists()) { - //System.out.println("Image already exists!"); - return rotatedImageFile; - } - - return generateRotatedImage(tabfile, varcount, casecount); - - } - - private File generateRotatedImage (File tabfile, int varcount, int casecount) throws IOException { - // TODO: throw exceptions if bad file, zero varcount, etc. ... - - String fileName = tabfile.getAbsolutePath(); - String rotatedImageFileName = fileName + ".90d"; - - int MAX_OUTPUT_STREAMS = 32; - int MAX_BUFFERED_BYTES = 10 * 1024 * 1024; // 10 MB - for now? - int MAX_COLUMN_BUFFER = 8 * 1024; - - // offsetHeader will contain the byte offsets of the individual column - // vectors in the final rotated image file - byte[] offsetHeader = new byte[varcount * 8]; - int[] bufferedSizes = new int[varcount]; - long[] cachedfileSizes = new long[varcount]; - File[] columnTempFiles = new File[varcount]; - - for (int i = 0; i < varcount; i++) { - bufferedSizes[i] = 0; - cachedfileSizes[i] = 0; - } - - // TODO: adjust MAX_COLUMN_BUFFER here, so that the total size is - // no more than MAX_BUFFERED_BYTES (but no less than 1024 maybe?) - - byte[][] bufferedColumns = new byte [varcount][MAX_COLUMN_BUFFER]; - - // read the tab-delimited file: - - try (FileInputStream tabfileStream = new FileInputStream(tabfile); - Scanner scanner = new Scanner(tabfileStream)) { - scanner.useDelimiter("\\n"); - - for (int caseindex = 0; caseindex < casecount; caseindex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split("\t", -1); - // TODO: throw an exception if there are fewer tab-delimited - // tokens than the number of variables specified. - String token = ""; - int tokensize = 0; - for (int varindex = 0; varindex < varcount; varindex++) { - // TODO: figure out the safest way to convert strings to - // bytes here. Is it going to be safer to use getBytes("UTF8")? - // we are already making the assumption that the values - // in the tab file are in UTF8. -- L.A. - token = line[varindex] + "\n"; - tokensize = token.getBytes().length; - if (bufferedSizes[varindex] + tokensize > MAX_COLUMN_BUFFER) { - // fill the buffer and dump its contents into the temp file: - // (do note that there may be *several* MAX_COLUMN_BUFFERs - // worth of bytes in the token!) - - int tokenoffset = 0; - - if (bufferedSizes[varindex] != MAX_COLUMN_BUFFER) { - tokenoffset = MAX_COLUMN_BUFFER - bufferedSizes[varindex]; - System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokenoffset); - } // (otherwise the buffer is already full, and we should - // simply dump it into the temp file, without adding any - // extra bytes to it) - - File bufferTempFile = columnTempFiles[varindex]; - if (bufferTempFile == null) { - bufferTempFile = File.createTempFile("columnBufferFile", "bytes"); - columnTempFiles[varindex] = bufferTempFile; - } - - // *append* the contents of the buffer to the end of the - // temp file, if already exists: - try (BufferedOutputStream outputStream = new BufferedOutputStream( - new FileOutputStream(bufferTempFile, true))) { - outputStream.write(bufferedColumns[varindex], 0, MAX_COLUMN_BUFFER); - cachedfileSizes[varindex] += MAX_COLUMN_BUFFER; - - // keep writing MAX_COLUMN_BUFFER-size chunks of bytes into - // the temp file, for as long as there's more than MAX_COLUMN_BUFFER - // bytes left in the token: - - while (tokensize - tokenoffset > MAX_COLUMN_BUFFER) { - outputStream.write(token.getBytes(), tokenoffset, MAX_COLUMN_BUFFER); - cachedfileSizes[varindex] += MAX_COLUMN_BUFFER; - tokenoffset += MAX_COLUMN_BUFFER; - } - - } - - // buffer the remaining bytes and reset the buffered - // byte counter: - - System.arraycopy(token.getBytes(), - tokenoffset, - bufferedColumns[varindex], - 0, - tokensize - tokenoffset); - - bufferedSizes[varindex] = tokensize - tokenoffset; - - } else { - // continue buffering - System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokensize); - bufferedSizes[varindex] += tokensize; - } - } - } else { - throw new IOException("Tab file has fewer rows than the stored number of cases!"); - } - } - } - - // OK, we've created the individual byte vectors of the tab file columns; - // they may be partially saved in temp files and/or in memory. - // We now need to go through all these buffers and create the final - // rotated image file. - - try (BufferedOutputStream finalOut = new BufferedOutputStream( - new FileOutputStream(new File(rotatedImageFileName)))) { - - // but first we should create the offset header and write it out into - // the final file; because it should be at the head, doh! - - long columnOffset = varcount * 8; - // (this is the offset of the first column vector; it is equal to the - // size of the offset header, i.e. varcount * 8 bytes) - - for (int varindex = 0; varindex < varcount; varindex++) { - long totalColumnBytes = cachedfileSizes[varindex] + bufferedSizes[varindex]; - columnOffset += totalColumnBytes; - // totalColumnBytes; - byte[] columnOffsetByteArray = ByteBuffer.allocate(8).putLong(columnOffset).array(); - System.arraycopy(columnOffsetByteArray, 0, offsetHeader, varindex * 8, 8); - } - - finalOut.write(offsetHeader, 0, varcount * 8); - - for (int varindex = 0; varindex < varcount; varindex++) { - long cachedBytesRead = 0; - - // check if there is a cached temp file: - - File cachedTempFile = columnTempFiles[varindex]; - if (cachedTempFile != null) { - byte[] cachedBytes = new byte[MAX_COLUMN_BUFFER]; - try (BufferedInputStream cachedIn = new BufferedInputStream(new FileInputStream(cachedTempFile))) { - int readlen = 0; - while ((readlen = cachedIn.read(cachedBytes)) > -1) { - finalOut.write(cachedBytes, 0, readlen); - cachedBytesRead += readlen; - } - } - - // delete the temp file: - cachedTempFile.delete(); - - } - - if (cachedBytesRead != cachedfileSizes[varindex]) { - throw new IOException("Could not read the correct number of bytes cached for column "+varindex+"; "+ - cachedfileSizes[varindex] + " bytes expected, "+cachedBytesRead+" read."); - } - - // then check if there are any bytes buffered for this column: - - if (bufferedSizes[varindex] > 0) { - finalOut.write(bufferedColumns[varindex], 0, bufferedSizes[varindex]); - } - - } - } - - return new File(rotatedImageFileName); - - } - - /* - * Test method for taking a "rotated" image, and reversing it, reassembling - * all the columns in the original order. Which should result in a file - * byte-for-byte identical file to the original tab-delimited version. - * - * (do note that this method is not efficiently implemented; it's only - * being used for experiments so far, to confirm the accuracy of the - * accuracy of generateRotatedImage(). It should not be used for any - * practical means in the application!) - */ - private void reverseRotatedImage (File rotfile, int varcount, int casecount) throws IOException { - // open the file, read in the offset header: - try (BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotfile))) { - byte[] offsetHeader = new byte[varcount * 8]; - long[] byteOffsets = new long[varcount]; - - int readlen = rotfileStream.read(offsetHeader); - - if (readlen != varcount * 8) { - throw new IOException ("Could not read "+varcount*8+" header bytes from the rotated file."); - } - - for (int varindex = 0; varindex < varcount; varindex++) { - byte[] offsetBytes = new byte[8]; - System.arraycopy(offsetHeader, varindex*8, offsetBytes, 0, 8); - - ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes); - byteOffsets[varindex] = offsetByteBuffer.getLong(); - - //System.out.println(byteOffsets[varindex]); - } - - String [][] reversedMatrix = new String[casecount][varcount]; - - long offset = varcount * 8; - byte[] columnBytes; - - for (int varindex = 0; varindex < varcount; varindex++) { - long columnLength = byteOffsets[varindex] - offset; - - - - columnBytes = new byte[(int)columnLength]; - readlen = rotfileStream.read(columnBytes); - - if (readlen != columnLength) { - throw new IOException ("Could not read "+columnBytes+" bytes for column "+varindex); - } - /* - String columnString = new String(columnBytes); - //System.out.print(columnString); - String[] values = columnString.split("\n", -1); - - if (values.length < casecount) { - throw new IOException("count mismatch: "+values.length+" tokens found for column "+varindex); - } - - for (int caseindex = 0; caseindex < casecount; caseindex++) { - reversedMatrix[caseindex][varindex] = values[caseindex]; - }*/ - - int bytecount = 0; - int byteoffset = 0; - int caseindex = 0; - //System.out.println("generating value vector for column "+varindex); - while (bytecount < columnLength) { - if (columnBytes[bytecount] == '\n') { - String token = new String(columnBytes, byteoffset, bytecount-byteoffset); - reversedMatrix[caseindex++][varindex] = token; - byteoffset = bytecount + 1; - } - bytecount++; - } - - if (caseindex != casecount) { - throw new IOException("count mismatch: "+caseindex+" tokens found for column "+varindex); - } - offset = byteOffsets[varindex]; - } - - for (int caseindex = 0; caseindex < casecount; caseindex++) { - for (int varindex = 0; varindex < varcount; varindex++) { - System.out.print(reversedMatrix[caseindex][varindex]); - if (varindex < varcount-1) { - System.out.print("\t"); - } else { - System.out.print("\n"); - } - } - } - - } - - - } - - /** - * main() method, for testing - * usage: java edu.harvard.iq.dataverse.dataaccess.TabularSubsetGenerator testfile.tab varcount casecount column type - * make sure the CLASSPATH contains ... - * - */ - - public static void main(String[] args) { - - String tabFileName = args[0]; - int varcount = new Integer(args[1]).intValue(); - int casecount = new Integer(args[2]).intValue(); - int column = new Integer(args[3]).intValue(); - String type = args[4]; - - File tabFile = new File(tabFileName); - File rotatedImageFile = null; - - TabularSubsetGenerator subsetGenerator = new TabularSubsetGenerator(); - - /* - try { - rotatedImageFile = subsetGenerator.getRotatedImage(tabFile, varcount, casecount); - } catch (IOException ex) { - System.out.println(ex.getMessage()); - } - */ - - //System.out.println("\nFinished generating \"rotated\" column image file."); - - //System.out.println("\nOffsets:"); - - MathContext doubleMathContext = new MathContext(15, RoundingMode.HALF_EVEN); - String FORMAT_IEEE754 = "%+#.15e"; - - try { - //subsetGenerator.reverseRotatedImage(rotatedImageFile, varcount, casecount); - //String[] columns = subsetGenerator.subsetStringVector(tabFile, column, varcount, casecount); - if ("string".equals(type)) { - String[] columns = subsetGenerator.subsetStringVector(tabFile, column, varcount, casecount); - for (int i = 0; i < casecount; i++) { - System.out.println(columns[i]); - } - } else { - - Double[] columns = subsetGenerator.subsetDoubleVector(tabFile, column, varcount, casecount); - for (int i = 0; i < casecount; i++) { - if (columns[i] != null) { - BigDecimal outBigDecimal = new BigDecimal(columns[i], doubleMathContext); - System.out.println(String.format(FORMAT_IEEE754, outBigDecimal)); - } else { - System.out.println("NA"); - } - //System.out.println(columns[i]); - } - } - } catch (IOException ex) { - System.out.println(ex.getMessage()); - } - } -} - - + scanner.next(); + } +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java deleted file mode 100644 index 89e033353c1..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ - -package edu.harvard.iq.dataverse.dataaccess; - -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.datavariable.DataVariable; -import java.io.IOException; -import java.io.InputStream; -import java.util.List; -import java.util.logging.Logger; - -/** - * - * @author Leonid Andreev - */ -public class TabularSubsetInputStream extends InputStream { - private static final Logger logger = Logger.getLogger(TabularSubsetInputStream.class.getCanonicalName()); - - private TabularSubsetGenerator subsetGenerator = null; - private int numberOfSubsetVariables; - private int numberOfObservations; - private int numberOfObservationsRead = 0; - private byte[] leftoverBytes = null; - - public TabularSubsetInputStream(DataFile datafile, List variables) throws IOException { - if (datafile == null) { - throw new IOException("Null datafile in subset request"); - } - if (!datafile.isTabularData()) { - throw new IOException("Subset requested on a non-tabular data file"); - } - numberOfObservations = datafile.getDataTable().getCaseQuantity().intValue(); - - if (variables == null || variables.size() < 1) { - throw new IOException("Null or empty list of variables in subset request."); - } - numberOfSubsetVariables = variables.size(); - subsetGenerator = new TabularSubsetGenerator(datafile, variables); - - } - - //@Override - public int read() throws IOException { - throw new IOException("read() method not implemented; do not use."); - } - - //@Override - public int read(byte[] b) throws IOException { - // TODO: - // Move this code into TabularSubsetGenerator - logger.fine("subset input stream: read request, on a "+b.length+" byte buffer;"); - - if (numberOfSubsetVariables == 1) { - logger.fine("calling the single variable subset read method"); - return subsetGenerator.readSingleColumnSubset(b); - } - - int bytesread = 0; - byte [] linebuffer; - - // do we have a leftover? - if (leftoverBytes != null) { - if (leftoverBytes.length < b.length) { - System.arraycopy(leftoverBytes, 0, b, 0, leftoverBytes.length); - bytesread = leftoverBytes.length; - leftoverBytes = null; - - } else { - // shouldn't really happen... unless it's a very large subset, - // or a very long string, etc. - System.arraycopy(leftoverBytes, 0, b, 0, b.length); - byte[] tmp = new byte[leftoverBytes.length - b.length]; - System.arraycopy(leftoverBytes, b.length, tmp, 0, leftoverBytes.length - b.length); - leftoverBytes = tmp; - tmp = null; - return b.length; - } - } - - while (bytesread < b.length && numberOfObservationsRead < numberOfObservations) { - linebuffer = subsetGenerator.readSubsetLineBytes(); - numberOfObservationsRead++; - - if (bytesread + linebuffer.length < b.length) { - // copy linebuffer into the return buffer: - System.arraycopy(linebuffer, 0, b, bytesread, linebuffer.length); - bytesread += linebuffer.length; - } else { - System.arraycopy(linebuffer, 0, b, bytesread, b.length - bytesread); - // save the leftover; - if (bytesread + linebuffer.length > b.length) { - leftoverBytes = new byte[bytesread + linebuffer.length - b.length]; - System.arraycopy(linebuffer, b.length - bytesread, leftoverBytes, 0, bytesread + linebuffer.length - b.length); - } - return b.length; - } - } - - // and this means we've reached the end of the tab file! - - return bytesread > 0 ? bytesread : -1; - } - - //@Override - public void close() { - if (subsetGenerator != null) { - subsetGenerator.close(); - } - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index adbd132bce8..03a0044a987 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -9,6 +9,8 @@ import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.dataaccess.DataAccess; + +import static edu.harvard.iq.dataverse.api.ApiConstants.DS_VERSION_DRAFT; import static edu.harvard.iq.dataverse.dataaccess.DataAccess.getStorageIO; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; @@ -409,6 +411,69 @@ public static InputStream getThumbnailAsInputStream(Dataset dataset, int size) { return nonDefaultDatasetThumbnail; } } + + public static InputStream getLogoAsInputStream(Dataset dataset) { + if (dataset == null) { + return null; + } + StorageIO dataAccess = null; + + try { + dataAccess = DataAccess.getStorageIO(dataset); + } catch (IOException ioex) { + logger.warning("getLogo(): Failed to initialize dataset StorageIO for " + dataset.getStorageIdentifier() + + " (" + ioex.getMessage() + ")"); + } + + InputStream in = null; + try { + if (dataAccess == null) { + logger.warning( + "getLogo(): Failed to initialize dataset StorageIO for " + dataset.getStorageIdentifier()); + } else { + in = dataAccess.getAuxFileAsInputStream(datasetLogoFilenameFinal); + } + } catch (IOException ex) { + logger.fine( + "Dataset-level thumbnail file does not exist, or failed to open; will try to find an image file that can be used as the thumbnail."); + } + + if (in == null) { + DataFile thumbnailFile = dataset.getThumbnailFile(); + + if (thumbnailFile == null) { + if (dataset.isUseGenericThumbnail()) { + logger.fine("Dataset (id :" + dataset.getId() + ") does not have a logo and is 'Use Generic'."); + return null; + } else { + thumbnailFile = attemptToAutomaticallySelectThumbnailFromDataFiles(dataset, null); + if (thumbnailFile == null) { + logger.fine("Dataset (id :" + dataset.getId() + + ") does not have a logo available that could be selected automatically."); + return null; + } else { + + } + } + } + if (thumbnailFile.isRestricted()) { + logger.fine("Dataset (id :" + dataset.getId() + + ") has a logo the user selected but the file must have later been restricted. Returning null."); + return null; + } + + try { + in = ImageThumbConverter.getImageThumbnailAsInputStream(thumbnailFile.getStorageIO(), + ImageThumbConverter.DEFAULT_DATASETLOGO_SIZE).getInputStream(); + } catch (IOException ioex) { + logger.warning("getLogo(): Failed to get logo from DataFile for " + dataset.getStorageIdentifier() + + " (" + ioex.getMessage() + ")"); + ioex.printStackTrace(); + } + + } + return in; + } /** * The dataset logo is the file that a user uploads which is *not* one of @@ -521,7 +586,7 @@ public static boolean validateDatasetMetadataExternally(Dataset ds, String execu // for the filter to whitelist by these attributes. try { - jsonMetadata = json(ds).add("datasetVersion", json(ds.getLatestVersion())) + jsonMetadata = json(ds).add("datasetVersion", json(ds.getLatestVersion(), true)) .add("sourceAddress", sourceAddressLabel) .add("userIdentifier", userIdentifier) .add("parentAlias", ds.getOwner().getAlias()) @@ -580,10 +645,10 @@ public static String getLicenseURI(DatasetVersion dsv) { // Return the URI // For standard licenses, just return the stored URI return (license != null) ? license.getUri().toString() - // For custom terms, construct a URI with :draft or the version number in the URI + // For custom terms, construct a URI with draft version constant or the version number in the URI : (dsv.getVersionState().name().equals("DRAFT") ? dsv.getDataverseSiteUrl() - + "/api/datasets/:persistentId/versions/:draft/customlicense?persistentId=" + + "/api/datasets/:persistentId/versions/" + DS_VERSION_DRAFT + "/customlicense?persistentId=" + dsv.getDataset().getGlobalId().asString() : dsv.getDataverseSiteUrl() + "/api/datasets/:persistentId/versions/" + dsv.getVersionNumber() + "." + dsv.getMinorVersionNumber() + "/customlicense?persistentId=" diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 98d5afc47e6..0143fced87c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -27,7 +27,6 @@ import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; -import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; import edu.harvard.iq.dataverse.util.json.JsonPrinter; @@ -61,7 +60,8 @@ import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import org.apache.commons.io.IOUtils; - +import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDataFilesCommand; +import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; /** @@ -1204,17 +1204,24 @@ private boolean step_030_createNewFilesViaIngest(){ clone = workingVersion.cloneDatasetVersion(); } try { - CreateDataFileResult result = FileUtil.createDataFiles(workingVersion, + /*CreateDataFileResult result = FileUtil.createDataFiles(workingVersion, this.newFileInputStream, this.newFileName, this.newFileContentType, this.newStorageIdentifier, this.newCheckSum, this.newCheckSumType, - this.systemConfig); - initialFileList = result.getDataFiles(); + this.systemConfig);*/ + + UploadSessionQuotaLimit quota = null; + if (systemConfig.isStorageQuotasEnforced()) { + quota = fileService.getUploadSessionQuotaLimit(dataset); + } + Command cmd = new CreateNewDataFilesCommand(dvRequest, workingVersion, newFileInputStream, newFileName, newFileContentType, newStorageIdentifier, quota, newCheckSum, newCheckSumType); + CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd); + initialFileList = createDataFilesResult.getDataFiles(); - } catch (IOException ex) { + } catch (CommandException ex) { if (ex.getMessage() != null && !ex.getMessage().isEmpty()) { this.addErrorSevere(getBundleErr("ingest_create_file_err") + " " + ex.getMessage()); } else { diff --git a/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java b/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java index 29e821c28a4..147c2c004db 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java +++ b/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java @@ -71,6 +71,7 @@ public class VariableMetadata implements Serializable { /** * universe: metadata variable field. */ + @Column(columnDefinition="TEXT") private String universe; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/dataverse/DataverseUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataverse/DataverseUtil.java index 7964c56835e..f45a9058e7c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataverse/DataverseUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataverse/DataverseUtil.java @@ -104,7 +104,7 @@ public static void checkMetadataLangauge(Dataset ds, Dataverse owner, Map { + protected final DataFile dataFile; + protected final boolean includeDeaccessioned; + + public AbstractGetPublishedFileMetadataCommand(DataverseRequest request, DataFile dataFile, boolean includeDeaccessioned) { + super(request, dataFile); + this.dataFile = dataFile; + this.includeDeaccessioned = includeDeaccessioned; + } + + protected FileMetadata getLatestPublishedFileMetadata(CommandContext ctxt) { + return dataFile.getFileMetadatas().stream().filter(fileMetadata -> { + DatasetVersion.VersionState versionState = fileMetadata.getDatasetVersion().getVersionState(); + return (!versionState.equals(DatasetVersion.VersionState.DRAFT) + && isDatasetVersionAccessible(fileMetadata.getDatasetVersion(), dataFile.getOwner(), ctxt)); + }).reduce(null, DataFile::getTheNewerFileMetadata); + } + + protected boolean isDatasetVersionAccessible(DatasetVersion datasetVersion, Dataset ownerDataset, CommandContext ctxt) { + return datasetVersion.isReleased() || isDatasetVersionDeaccessionedAndAccessible(datasetVersion, ownerDataset, ctxt); + } + + private boolean isDatasetVersionDeaccessionedAndAccessible(DatasetVersion datasetVersion, Dataset ownerDataset, CommandContext ctxt) { + return includeDeaccessioned && datasetVersion.isDeaccessioned() && ctxt.permissions().requestOn(getRequest(), ownerDataset).has(Permission.EditDataset); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index b988fd05f03..29c27d0396d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -1,6 +1,5 @@ package edu.harvard.iq.dataverse.engine.command.impl; -import edu.harvard.iq.dataverse.DOIDataCiteRegisterService; import edu.harvard.iq.dataverse.DataCitation; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; @@ -14,6 +13,7 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DOIDataCiteRegisterService; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.bagit.BagGenerator; import edu.harvard.iq.dataverse.util.bagit.OREMap; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AssignRoleCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AssignRoleCommand.java index 5577d541012..e4edb973cd9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AssignRoleCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AssignRoleCommand.java @@ -19,6 +19,7 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import java.util.Collections; +import java.util.HashSet; import java.util.Map; import java.util.Set; @@ -75,9 +76,19 @@ public RoleAssignment execute(CommandContext ctxt) throws CommandException { @Override public Map> getRequiredPermissions() { // for data file check permission on owning dataset - return Collections.singletonMap("", - defPoint instanceof Dataverse ? Collections.singleton(Permission.ManageDataversePermissions) - : defPoint instanceof Dataset ? Collections.singleton(Permission.ManageDatasetPermissions) : Collections.singleton(Permission.ManageFilePermissions)); + Set requiredPermissions = new HashSet(); + + if (defPoint instanceof Dataverse) { + requiredPermissions.add(Permission.ManageDataversePermissions); + } else if (defPoint instanceof Dataset) { + requiredPermissions.add(Permission.ManageDatasetPermissions); + } else { + requiredPermissions.add(Permission.ManageFilePermissions); + } + + requiredPermissions.addAll(role.permissions()); + + return Collections.singletonMap("", requiredPermissions); } @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java new file mode 100644 index 00000000000..3a21345448b --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java @@ -0,0 +1,751 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; +import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; +import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +//import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException; +import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; +import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException; +import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.FileUtil; +import static edu.harvard.iq.dataverse.util.FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; +import static edu.harvard.iq.dataverse.util.FileUtil.createIngestFailureReport; +import static edu.harvard.iq.dataverse.util.FileUtil.determineFileType; +import static edu.harvard.iq.dataverse.util.FileUtil.determineFileTypeByNameAndExtension; +import static edu.harvard.iq.dataverse.util.FileUtil.getFilesTempDirectory; +import static edu.harvard.iq.dataverse.util.FileUtil.saveInputStreamInTempFile; +import static edu.harvard.iq.dataverse.util.FileUtil.useRecognizedType; +import edu.harvard.iq.dataverse.util.ShapefileHandler; +import edu.harvard.iq.dataverse.util.StringUtil; +import edu.harvard.iq.dataverse.util.file.BagItFileHandler; +import edu.harvard.iq.dataverse.util.file.BagItFileHandlerFactory; +import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.text.MessageFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.logging.Logger; +import java.util.zip.GZIPInputStream; +import java.util.zip.ZipFile; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; +import jakarta.enterprise.inject.spi.CDI; +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; + +/** + * + * @author landreev + */ +// Note the commented out @RequiredPermissions. We need to use dynamic +// permissions instead, to accommodate both adding files to an existing +// dataset and files being uploaded in the context of creating a new dataset +// via the Add Dataset page. +//@RequiredPermissions( Permission.EditDataset ) +public class CreateNewDataFilesCommand extends AbstractCommand { + private static final Logger logger = Logger.getLogger(CreateNewDataFilesCommand.class.getCanonicalName()); + + private final DatasetVersion version; + private final InputStream inputStream; + private final String fileName; + private final String suppliedContentType; + private final UploadSessionQuotaLimit quota; + // parent Dataverse must be specified when the command is called on Create + // of a new dataset that does not exist in the database yet (for the purposes + // of authorization - see getRequiredPermissions() below): + private final Dataverse parentDataverse; + // With Direct Upload the following values already exist and are passed to the command: + private final String newStorageIdentifier; + private final String newCheckSum; + private DataFile.ChecksumType newCheckSumType; + private final Long newFileSize; + + public CreateNewDataFilesCommand(DataverseRequest aRequest, DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, UploadSessionQuotaLimit quota, String newCheckSum) { + this(aRequest, version, inputStream, fileName, suppliedContentType, newStorageIdentifier, quota, newCheckSum, null); + } + + public CreateNewDataFilesCommand(DataverseRequest aRequest, DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, UploadSessionQuotaLimit quota, String newCheckSum, DataFile.ChecksumType newCheckSumType) { + this(aRequest, version, inputStream, fileName, suppliedContentType, newStorageIdentifier, quota, newCheckSum, newCheckSumType, null, null); + } + + // This version of the command must be used when files are created in the + // context of creating a brand new dataset (from the Add Dataset page): + + public CreateNewDataFilesCommand(DataverseRequest aRequest, DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, UploadSessionQuotaLimit quota, String newCheckSum, DataFile.ChecksumType newCheckSumType, Long newFileSize, Dataverse dataverse) { + super(aRequest, dataverse); + + this.version = version; + this.inputStream = inputStream; + this.fileName = fileName; + this.suppliedContentType = suppliedContentType; + this.newStorageIdentifier = newStorageIdentifier; + this.newCheckSum = newCheckSum; + this.newCheckSumType = newCheckSumType; + this.parentDataverse = dataverse; + this.quota = quota; + this.newFileSize = newFileSize; + } + + + @Override + public CreateDataFileResult execute(CommandContext ctxt) throws CommandException { + List datafiles = new ArrayList<>(); + + //When there is no checksum/checksumtype being sent (normal upload, needs to be calculated), set the type to the current default + if(newCheckSumType == null) { + newCheckSumType = ctxt.systemConfig().getFileFixityChecksumAlgorithm(); + } + + String warningMessage = null; + + // save the file, in the temporary location for now: + Path tempFile = null; + + Long fileSizeLimit = ctxt.systemConfig().getMaxFileUploadSizeForStore(version.getDataset().getEffectiveStorageDriverId()); + Long storageQuotaLimit = null; + + if (ctxt.systemConfig().isStorageQuotasEnforced()) { + if (quota != null) { + storageQuotaLimit = quota.getRemainingQuotaInBytes(); + } + } + String finalType = null; + File newFile = null; // this File will be used for a single-file, local (non-direct) upload + long fileSize = -1; + + + if (newStorageIdentifier == null) { + if (getFilesTempDirectory() != null) { + try { + tempFile = Files.createTempFile(Paths.get(getFilesTempDirectory()), "tmp", "upload"); + // "temporary" location is the key here; this is why we are not using + // the DataStore framework for this - the assumption is that + // temp files will always be stored on the local filesystem. + // -- L.A. Jul. 2014 + logger.fine("Will attempt to save the file as: " + tempFile.toString()); + Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING); + } catch (IOException ioex) { + throw new CommandExecutionException("Failed to save the upload as a temp file (temp disk space?)", ioex, this); + } + + // A file size check, before we do anything else: + // (note that "no size limit set" = "unlimited") + // (also note, that if this is a zip file, we'll be checking + // the size limit for each of the individual unpacked files) + fileSize = tempFile.toFile().length(); + if (fileSizeLimit != null && fileSize > fileSizeLimit) { + try { + tempFile.toFile().delete(); + } catch (Exception ex) { + // ignore - but log a warning + logger.warning("Could not remove temp file " + tempFile.getFileName()); + } + throw new CommandExecutionException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(fileSize), bytesToHumanReadable(fileSizeLimit)), this); + } + + } else { + throw new CommandExecutionException("Temp directory is not configured.", this); + } + + logger.fine("mime type supplied: " + suppliedContentType); + + // Let's try our own utilities (Jhove, etc.) to determine the file type + // of the uploaded file. (We may already have a mime type supplied for this + // file - maybe the type that the browser recognized on upload; or, if + // it's a harvest, maybe the remote server has already given us the type + // for this file... with our own type utility we may or may not do better + // than the type supplied: + // -- L.A. + String recognizedType = null; + + try { + recognizedType = determineFileType(tempFile.toFile(), fileName); + logger.fine("File utility recognized the file as " + recognizedType); + if (recognizedType != null && !recognizedType.equals("")) { + if (useRecognizedType(suppliedContentType, recognizedType)) { + finalType = recognizedType; + } + } + + } catch (Exception ex) { + logger.warning("Failed to run the file utility mime type check on file " + fileName); + } + + if (finalType == null) { + finalType = (suppliedContentType == null || suppliedContentType.equals("")) + ? MIME_TYPE_UNDETERMINED_DEFAULT + : suppliedContentType; + } + + // A few special cases: + // if this is a gzipped FITS file, we'll uncompress it, and ingest it as + // a regular FITS file: + if (finalType.equals("application/fits-gzipped")) { + + InputStream uncompressedIn = null; + String finalFileName = fileName; + // if the file name had the ".gz" extension, remove it, + // since we are going to uncompress it: + if (fileName != null && fileName.matches(".*\\.gz$")) { + finalFileName = fileName.replaceAll("\\.gz$", ""); + } + + DataFile datafile = null; + long uncompressedFileSize = -1; + try { + uncompressedIn = new GZIPInputStream(new FileInputStream(tempFile.toFile())); + File unZippedTempFile = saveInputStreamInTempFile(uncompressedIn, fileSizeLimit, storageQuotaLimit); + uncompressedFileSize = unZippedTempFile.length(); + datafile = FileUtil.createSingleDataFile(version, unZippedTempFile, finalFileName, MIME_TYPE_UNDETERMINED_DEFAULT, ctxt.systemConfig().getFileFixityChecksumAlgorithm()); + } catch (IOException | FileExceedsMaxSizeException | FileExceedsStorageQuotaException ioex) { + // it looks like we simply skip the file silently, if its uncompressed size + // exceeds the limit. we should probably report this in detail instead. + datafile = null; + } finally { + if (uncompressedIn != null) { + try { + uncompressedIn.close(); + } catch (IOException e) { + } + } + } + + // If we were able to produce an uncompressed file, we'll use it + // to create and return a final DataFile; if not, we're not going + // to do anything - and then a new DataFile will be created further + // down, from the original, uncompressed file. + if (datafile != null) { + // remove the compressed temp file: + try { + tempFile.toFile().delete(); + } catch (SecurityException ex) { + // (this is very non-fatal) + logger.warning("Failed to delete temporary file " + tempFile.toString()); + } + + datafiles.add(datafile); + // Update quota if present + if (quota != null) { + quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + uncompressedFileSize); + } + return CreateDataFileResult.success(fileName, finalType, datafiles); + } + + // If it's a ZIP file, we are going to unpack it and create multiple + // DataFile objects from its contents: + } else if (finalType.equals("application/zip")) { + + ZipFile zipFile = null; + ZipInputStream unZippedIn = null; + ZipEntry zipEntry = null; + + int fileNumberLimit = ctxt.systemConfig().getZipUploadFilesLimit(); + Long combinedUnzippedFileSize = 0L; + + try { + Charset charset = null; + /* + TODO: (?) + We may want to investigate somehow letting the user specify + the charset for the filenames in the zip file... + - otherwise, ZipInputStream bails out if it encounteres a file + name that's not valid in the current charest (i.e., UTF-8, in + our case). It would be a bit trickier than what we're doing for + SPSS tabular ingests - with the lang. encoding pulldown menu - + because this encoding needs to be specified *before* we upload and + attempt to unzip the file. + -- L.A. 4.0 beta12 + logger.info("default charset is "+Charset.defaultCharset().name()); + if (Charset.isSupported("US-ASCII")) { + logger.info("charset US-ASCII is supported."); + charset = Charset.forName("US-ASCII"); + if (charset != null) { + logger.info("was able to obtain charset for US-ASCII"); + } + + } + */ + + /** + * Perform a quick check for how many individual files are + * inside this zip archive. If it's above the limit, we can + * give up right away, without doing any unpacking. + * This should be a fairly inexpensive operation, we just need + * to read the directory at the end of the file. + */ + + if (charset != null) { + zipFile = new ZipFile(tempFile.toFile(), charset); + } else { + zipFile = new ZipFile(tempFile.toFile()); + } + /** + * The ZipFile constructors above will throw ZipException - + * a type of IOException - if there's something wrong + * with this file as a zip. There's no need to intercept it + * here, it will be caught further below, with other IOExceptions, + * at which point we'll give up on trying to unpack it and + * then attempt to save it as is. + */ + + int numberOfUnpackableFiles = 0; + + /** + * Note that we can't just use zipFile.size(), + * unfortunately, since that's the total number of entries, + * some of which can be directories. So we need to go + * through all the individual zipEntries and count the ones + * that are files. + */ + + for (Enumeration entries = zipFile.entries(); entries.hasMoreElements();) { + ZipEntry entry = entries.nextElement(); + logger.fine("inside first zip pass; this entry: "+entry.getName()); + if (!entry.isDirectory()) { + String shortName = entry.getName().replaceFirst("^.*[\\/]", ""); + // ... and, finally, check if it's a "fake" file - a zip archive entry + // created for a MacOS X filesystem element: (these + // start with "._") + if (!shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName)) { + numberOfUnpackableFiles++; + if (numberOfUnpackableFiles > fileNumberLimit) { + logger.warning("Zip upload - too many files in the zip to process individually."); + warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit + + "); please upload a zip archive with fewer files, if you want them to be ingested " + + "as individual DataFiles."; + throw new IOException(); + } + // In addition to counting the files, we can + // also check the file size while we're here, + // provided the size limit is defined; if a single + // file is above the individual size limit, unzipped, + // we give up on unpacking this zip archive as well: + if (fileSizeLimit != null && entry.getSize() > fileSizeLimit) { + throw new FileExceedsMaxSizeException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(entry.getSize()), bytesToHumanReadable(fileSizeLimit))); + } + // Similarly, we want to check if saving all these unpacked + // files is going to push the disk usage over the + // quota: + if (storageQuotaLimit != null) { + combinedUnzippedFileSize = combinedUnzippedFileSize + entry.getSize(); + if (combinedUnzippedFileSize > storageQuotaLimit) { + //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(combinedUnzippedFileSize), bytesToHumanReadable(storageQuotaLimit))); + // change of plans: if the unzipped content inside exceeds the remaining quota, + // we reject the upload outright, rather than accepting the zip + // file as is. + throw new CommandExecutionException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.unzipped.quota_exceeded"), bytesToHumanReadable(storageQuotaLimit)), this); + } + } + } + } + } + + // OK we're still here - that means we can proceed unzipping. + + // Close the ZipFile, re-open as ZipInputStream: + zipFile.close(); + // reset: + combinedUnzippedFileSize = 0L; + + if (charset != null) { + unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile()), charset); + } else { + unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile())); + } + + while (true) { + try { + zipEntry = unZippedIn.getNextEntry(); + } catch (IllegalArgumentException iaex) { + // Note: + // ZipInputStream documentation doesn't even mention that + // getNextEntry() throws an IllegalArgumentException! + // but that's what happens if the file name of the next + // entry is not valid in the current CharSet. + // -- L.A. + warningMessage = "Failed to unpack Zip file. (Unknown Character Set used in a file name?) Saving the file as is."; + logger.warning(warningMessage); + throw new IOException(); + } + + if (zipEntry == null) { + break; + } + // Note that some zip entries may be directories - we + // simply skip them: + + if (!zipEntry.isDirectory()) { + if (datafiles.size() > fileNumberLimit) { + logger.warning("Zip upload - too many files."); + warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit + + "); please upload a zip archive with fewer files, if you want them to be ingested " + + "as individual DataFiles."; + throw new IOException(); + } + + String fileEntryName = zipEntry.getName(); + logger.fine("ZipEntry, file: " + fileEntryName); + + if (fileEntryName != null && !fileEntryName.equals("")) { + + String shortName = fileEntryName.replaceFirst("^.*[\\/]", ""); + + // Check if it's a "fake" file - a zip archive entry + // created for a MacOS X filesystem element: (these + // start with "._") + if (!shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName)) { + // OK, this seems like an OK file entry - we'll try + // to read it and create a DataFile with it: + + String storageIdentifier = FileUtil.generateStorageIdentifier(); + File unzippedFile = new File(getFilesTempDirectory() + "/" + storageIdentifier); + Files.copy(unZippedIn, unzippedFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + // No need to check the size of this unpacked file against the size limit, + // since we've already checked for that in the first pass. + + DataFile datafile = FileUtil.createSingleDataFile(version, null, storageIdentifier, shortName, + MIME_TYPE_UNDETERMINED_DEFAULT, + ctxt.systemConfig().getFileFixityChecksumAlgorithm(), null, false); + + if (!fileEntryName.equals(shortName)) { + // If the filename looks like a hierarchical folder name (i.e., contains slashes and backslashes), + // we'll extract the directory name; then subject it to some "aggressive sanitizing" - strip all + // the leading, trailing and duplicate slashes; then replace all the characters that + // don't pass our validation rules. + String directoryName = fileEntryName.replaceFirst("[\\\\/][\\\\/]*[^\\\\/]*$", ""); + directoryName = StringUtil.sanitizeFileDirectory(directoryName, true); + // if (!"".equals(directoryName)) { + if (!StringUtil.isEmpty(directoryName)) { + logger.fine("setting the directory label to " + directoryName); + datafile.getFileMetadata().setDirectoryLabel(directoryName); + } + } + + if (datafile != null) { + // We have created this datafile with the mime type "unknown"; + // Now that we have it saved in a temporary location, + // let's try and determine its real type: + + String tempFileName = getFilesTempDirectory() + "/" + datafile.getStorageIdentifier(); + + try { + recognizedType = determineFileType(unzippedFile, shortName); + // null the File explicitly, to release any open FDs: + unzippedFile = null; + logger.fine("File utility recognized unzipped file as " + recognizedType); + if (recognizedType != null && !recognizedType.equals("")) { + datafile.setContentType(recognizedType); + } + } catch (Exception ex) { + logger.warning("Failed to run the file utility mime type check on file " + fileName); + } + + datafiles.add(datafile); + combinedUnzippedFileSize += datafile.getFilesize(); + } + } + } + } + unZippedIn.closeEntry(); + + } + + } catch (IOException ioex) { + // just clear the datafiles list and let + // ingest default to creating a single DataFile out + // of the unzipped file. + logger.warning("Unzipping failed; rolling back to saving the file as is."); + if (warningMessage == null) { + warningMessage = BundleUtil.getStringFromBundle("file.addreplace.warning.unzip.failed"); + } + + datafiles.clear(); + } catch (FileExceedsMaxSizeException femsx) { + logger.warning("One of the unzipped files exceeds the size limit; resorting to saving the file as is. " + femsx.getMessage()); + warningMessage = BundleUtil.getStringFromBundle("file.addreplace.warning.unzip.failed.size", Arrays.asList(FileSizeChecker.bytesToHumanReadable(fileSizeLimit))); + datafiles.clear(); + } /*catch (FileExceedsStorageQuotaException fesqx) { + //logger.warning("One of the unzipped files exceeds the storage quota limit; resorting to saving the file as is. " + fesqx.getMessage()); + //warningMessage = BundleUtil.getStringFromBundle("file.addreplace.warning.unzip.failed.quota", Arrays.asList(FileSizeChecker.bytesToHumanReadable(storageQuotaLimit))); + //datafiles.clear(); + throw new CommandExecutionException(fesqx.getMessage(), fesqx, this); + }*/ finally { + if (zipFile != null) { + try { + zipFile.close(); + } catch (Exception zEx) {} + } + if (unZippedIn != null) { + try { + unZippedIn.close(); + } catch (Exception zEx) {} + } + } + if (!datafiles.isEmpty()) { + // remove the uploaded zip file: + try { + Files.delete(tempFile); + } catch (IOException ioex) { + // do nothing - it's just a temp file. + logger.warning("Could not remove temp file " + tempFile.getFileName().toString()); + } + // update the quota object: + if (quota != null) { + quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + combinedUnzippedFileSize); + } + // and return: + return CreateDataFileResult.success(fileName, finalType, datafiles); + } + + } else if (finalType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)) { + // Shape files may have to be split into multiple files, + // one zip archive per each complete set of shape files: + + // File rezipFolder = new File(this.getFilesTempDirectory()); + File rezipFolder = FileUtil.getShapefileUnzipTempDirectory(); + + IngestServiceShapefileHelper shpIngestHelper; + shpIngestHelper = new IngestServiceShapefileHelper(tempFile.toFile(), rezipFolder); + + boolean didProcessWork = shpIngestHelper.processFile(); + if (!(didProcessWork)) { + logger.severe("Processing of zipped shapefile failed."); + return CreateDataFileResult.error(fileName, finalType); + } + long combinedRezippedFileSize = 0L; + + try { + + for (File finalFile : shpIngestHelper.getFinalRezippedFiles()) { + FileInputStream finalFileInputStream = new FileInputStream(finalFile); + finalType = FileUtil.determineContentType(finalFile); + if (finalType == null) { + logger.warning("Content type is null; but should default to 'MIME_TYPE_UNDETERMINED_DEFAULT'"); + continue; + } + + File unZippedShapeTempFile = saveInputStreamInTempFile(finalFileInputStream, fileSizeLimit, storageQuotaLimit != null ? storageQuotaLimit - combinedRezippedFileSize : null); + DataFile new_datafile = FileUtil.createSingleDataFile(version, unZippedShapeTempFile, finalFile.getName(), finalType, ctxt.systemConfig().getFileFixityChecksumAlgorithm()); + + String directoryName = null; + String absolutePathName = finalFile.getParent(); + if (absolutePathName != null) { + if (absolutePathName.length() > rezipFolder.toString().length()) { + // This file lives in a subfolder - we want to + // preserve it in the FileMetadata: + directoryName = absolutePathName.substring(rezipFolder.toString().length() + 1); + + if (!StringUtil.isEmpty(directoryName)) { + new_datafile.getFileMetadata().setDirectoryLabel(directoryName); + } + } + } + if (new_datafile != null) { + datafiles.add(new_datafile); + combinedRezippedFileSize += unZippedShapeTempFile.length(); + // todo: can this new_datafile be null? + } else { + logger.severe("Could not add part of rezipped shapefile. new_datafile was null: " + finalFile.getName()); + } + try { + finalFileInputStream.close(); + } catch (IOException ioex) { + // this one can be ignored + } + } + } catch (FileExceedsMaxSizeException | FileExceedsStorageQuotaException femsx) { + logger.severe("One of the unzipped shape files exceeded the size limit, or the storage quota; giving up. " + femsx.getMessage()); + datafiles.clear(); + // (or should we throw an exception, instead of skipping it quietly? + } catch (IOException ioex) { + throw new CommandExecutionException("Failed to process one of the components of the unpacked shape file", ioex, this); + // todo? - maybe try to provide a more detailed explanation, of which repackaged component, etc.? + } + + // Delete the temp directory used for unzipping + // The try-catch is due to error encountered in using NFS for stocking file, + // cf. https://github.com/IQSS/dataverse/issues/5909 + try { + FileUtils.deleteDirectory(rezipFolder); + } catch (IOException ioex) { + // do nothing - it's a temp folder. + logger.warning("Could not remove temp folder, error message : " + ioex.getMessage()); + } + + if (!datafiles.isEmpty()) { + // remove the uploaded zip file: + try { + Files.delete(tempFile); + } catch (IOException ioex) { + // ignore - it's just a temp file - but let's log a warning + logger.warning("Could not remove temp file " + tempFile.getFileName().toString()); + } catch (SecurityException se) { + // same + logger.warning("Unable to delete: " + tempFile.toString() + "due to Security Exception: " + + se.getMessage()); + } + // update the quota object: + if (quota != null) { + quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + combinedRezippedFileSize); + } + return CreateDataFileResult.success(fileName, finalType, datafiles); + } else { + logger.severe("No files added from directory of rezipped shapefiles"); + } + return CreateDataFileResult.error(fileName, finalType); + + } else if (finalType.equalsIgnoreCase(BagItFileHandler.FILE_TYPE)) { + + try { + Optional bagItFileHandler = CDI.current().select(BagItFileHandlerFactory.class).get().getBagItFileHandler(); + if (bagItFileHandler.isPresent()) { + CreateDataFileResult result = bagItFileHandler.get().handleBagItPackage(ctxt.systemConfig(), version, fileName, tempFile.toFile()); + return result; + } + } catch (IOException ioex) { + throw new CommandExecutionException("Failed to process uploaded BagIt file", ioex, this); + } + } + + // These are the final File and its size that will be used to + // add create a single Datafile: + + newFile = tempFile.toFile(); + fileSize = newFile.length(); + + } else { + // Direct upload. + + // Since this is a direct upload, and therefore no temp file associated + // with it, we may, OR MAY NOT know the size of the file. If this is + // a direct upload via the UI, the page must have already looked up + // the size, after the client confirmed that the upload had completed. + // (so that we can reject the upload here, i.e. before the user clicks + // save, if it's over the size limit or storage quota). However, if + // this is a direct upload via the API, we will wait until the + // upload is finalized in the saveAndAddFiles method to enforce the + // limits. + if (newFileSize != null) { + fileSize = newFileSize; + + // if the size is specified, and it's above the individual size + // limit for this store, we can reject it now: + if (fileSizeLimit != null && fileSize > fileSizeLimit) { + throw new CommandExecutionException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(fileSize), bytesToHumanReadable(fileSizeLimit)), this); + } + } + + // Default to suppliedContentType if set or the overall undetermined default if a contenttype isn't supplied + finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; + String type = determineFileTypeByNameAndExtension(fileName); + if (!StringUtils.isBlank(type)) { + //Use rules for deciding when to trust browser supplied type + if (useRecognizedType(finalType, type)) { + finalType = type; + } + logger.fine("Supplied type: " + suppliedContentType + ", finalType: " + finalType); + } + + + } + + // Finally, if none of the special cases above were applicable (or + // if we were unable to unpack an uploaded file, etc.), we'll just + // create and return a single DataFile: + + + // We have already checked that this file does not exceed the individual size limit; + // but if we are processing it as is, as a single file, we need to check if + // its size does not go beyond the allocated storage quota (if specified): + + if (storageQuotaLimit != null && fileSize > storageQuotaLimit) { + if (newFile != null) { + // Remove the temp. file, if this is a non-direct upload. + // If this is a direct upload, it will be a responsibility of the + // component calling the command to remove the file that may have + // already been saved in the S3 volume. + try { + newFile.delete(); + } catch (Exception ex) { + // ignore - but log a warning + logger.warning("Could not remove temp file " + tempFile.getFileName()); + } + } + throw new CommandExecutionException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(fileSize), bytesToHumanReadable(storageQuotaLimit)), this); + } + + DataFile datafile = FileUtil.createSingleDataFile(version, newFile, newStorageIdentifier, fileName, finalType, newCheckSumType, newCheckSum); + + if (datafile != null) { + + if (warningMessage != null) { + createIngestFailureReport(datafile, warningMessage); + datafile.SetIngestProblem(); + } + if (datafile.getFilesize() < 0) { + datafile.setFilesize(fileSize); + } + datafiles.add(datafile); + + // Update the quota definition for the *current upload session* + // This is relevant for the uploads going through the UI page + // (where there may be an appreciable amount of time between the user + // uploading the files and clicking "save". The file size should be + // available here for both direct and local uploads via the UI. + // It is not yet available if this is direct-via-API - but + // for API uploads the quota check will be enforced during the final + // save. + if (fileSize > 0 && quota != null) { + logger.info("Setting total usage in bytes to " + (quota.getTotalUsageInBytes() + fileSize)); + quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + fileSize); + } + + return CreateDataFileResult.success(fileName, finalType, datafiles); + } + + return CreateDataFileResult.error(fileName, finalType); + } // end createDataFiles + + @Override + public Map> getRequiredPermissions() { + Map> ret = new HashMap<>(); + + ret.put("", new HashSet<>()); + + if (parentDataverse != null) { + // The command is called in the context of uploading files on + // create of a new dataset + ret.get("").add(Permission.AddDataset); + } else { + // An existing dataset + ret.get("").add(Permission.EditDataset); + } + + return ret; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java index c9ebe735e31..c22a2cdb4a2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java @@ -3,6 +3,7 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.RoleAssignment; import edu.harvard.iq.dataverse.Template; import edu.harvard.iq.dataverse.UserNotification; @@ -12,12 +13,13 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import static edu.harvard.iq.dataverse.util.StringUtil.nonEmpty; import java.util.logging.Logger; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import java.util.List; import java.sql.Timestamp; @@ -71,13 +73,18 @@ public CreateNewDatasetCommand(Dataset theDataset, DataverseRequest aRequest, Te */ @Override protected void additionalParameterTests(CommandContext ctxt) throws CommandException { - if ( nonEmpty(getDataset().getIdentifier()) ) { - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(getDataset().getProtocol(), ctxt); - if ( !idServiceBean.isGlobalIdUnique(getDataset().getGlobalId()) ) { - throw new IllegalCommandException(String.format("Dataset with identifier '%s', protocol '%s' and authority '%s' already exists", - getDataset().getIdentifier(), getDataset().getProtocol(), getDataset().getAuthority()), - this); - } + if (nonEmpty(getDataset().getIdentifier())) { + GlobalId pid = getDataset().getGlobalId(); + if (pid != null) { + PidProvider pidProvider = PidUtil.getPidProvider(pid.getProviderId()); + + if (!pidProvider.isGlobalIdUnique(pid)) { + throw new IllegalCommandException(String.format( + "Dataset with identifier '%s', protocol '%s' and authority '%s' already exists", + getDataset().getIdentifier(), getDataset().getProtocol(), getDataset().getAuthority()), + this); + } + } } } @@ -88,11 +95,11 @@ protected DatasetVersion getVersionToPersist( Dataset theDataset ) { @Override protected void handlePid(Dataset theDataset, CommandContext ctxt) throws CommandException { - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); - if(!idServiceBean.isConfigured()) { - throw new IllegalCommandException("PID Provider " + idServiceBean.getProviderInformation().get(0) + " is not configured.", this); + PidProvider pidProvider = PidUtil.getPidProvider(theDataset.getGlobalId().getProviderId()); + if(!pidProvider.canManagePID()) { + throw new IllegalCommandException("PID Provider " + pidProvider.getId() + " is not configured.", this); } - if ( !idServiceBean.registerWhenPublished() ) { + if ( !pidProvider.registerWhenPublished() ) { // pre-register a persistent id registerExternalIdentifier(theDataset, ctxt, true); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java index b8adc8e23e8..f83041d87bd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java @@ -91,7 +91,9 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // we have to merge to update the database but not flush because // we don't want to create two draft versions! Dataset tempDataset = ctxt.em().merge(getDataset()); - + + updateVersion = tempDataset.getLatestVersionForCopy(); + // Look for file metadata changes and update published metadata if needed List pubFmds = updateVersion.getFileMetadatas(); int pubFileCount = pubFmds.size(); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java new file mode 100644 index 00000000000..c0f863686da --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java @@ -0,0 +1,53 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; +import edu.harvard.iq.dataverse.storageuse.StorageQuota; +import edu.harvard.iq.dataverse.util.BundleUtil; +import java.util.logging.Logger; + +/** + * + * @author landreev + * + * A superuser-only command: + */ +@RequiredPermissions({}) +public class DeleteCollectionQuotaCommand extends AbstractVoidCommand { + + private static final Logger logger = Logger.getLogger(DeleteCollectionQuotaCommand.class.getCanonicalName()); + + private final Dataverse targetDataverse; + + public DeleteCollectionQuotaCommand(DataverseRequest aRequest, Dataverse target) { + super(aRequest, target); + targetDataverse = target; + } + + @Override + public void executeImpl(CommandContext ctxt) throws CommandException { + // first check if user is a superuser + if ( (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser() ) ) { + throw new PermissionException(BundleUtil.getStringFromBundle("dataverse.storage.quota.superusersonly"), + this, null, targetDataverse); + } + + if (targetDataverse == null) { + throw new IllegalCommandException("", this); + } + + StorageQuota storageQuota = targetDataverse.getStorageQuota(); + + if (storageQuota != null && storageQuota.getAllocation() != null) { + ctxt.dataverses().disableStorageQuota(storageQuota); + } + // ... and if no quota was enabled on the collection - nothing to do = success + } +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java index 83d0f877d61..0812c52a846 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; @@ -11,6 +12,8 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import java.io.IOException; @@ -23,7 +26,6 @@ import java.util.Collections; import java.util.logging.Level; import java.util.logging.Logger; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; /** * Deletes a data file, both DB entity and filesystem object. @@ -202,15 +204,18 @@ public FileVisitResult postVisitDirectory(final Path dir, final IOException e) */ } } - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); - try { - if (idServiceBean.alreadyRegistered(doomed)) { - idServiceBean.deleteIdentifier(doomed); + GlobalId pid = doomed.getGlobalId(); + if (pid != null) { + PidProvider pidProvider = PidUtil.getPidProvider(pid.getProviderId()); + + try { + if (pidProvider.alreadyRegistered(doomed)) { + pidProvider.deleteIdentifier(doomed); + } + } catch (Exception e) { + logger.log(Level.WARNING, "Identifier deletion was not successfull:", e.getMessage()); } - } catch (Exception e) { - logger.log(Level.WARNING, "Identifier deletion was not successfull:", e.getMessage()); } - DataFile doomedAndMerged = ctxt.em().merge(doomed); ctxt.em().remove(doomedAndMerged); /** @@ -235,6 +240,20 @@ public String describe() { @Override public boolean onSuccess(CommandContext ctxt, Object r) { + // Adjust the storage use for the parent containers: + if (!doomed.isHarvested()) { + long storedSize = doomed.getFilesize(); + // ingested tabular data files also have saved originals that + // are counted as "storage use" + Long savedOriginalSize = doomed.getOriginalFileSize(); + if (savedOriginalSize != null) { + // Note that DataFile.getFilesize() can return -1 (for "unknown"): + storedSize = storedSize > 0 ? storedSize + savedOriginalSize : savedOriginalSize; + } + if (storedSize > 0) { + ctxt.storageUse().incrementStorageSizeRecursively(doomed.getOwner().getId(), (0L - storedSize)); + } + } /** * We *could* re-index the entire dataset but it's more efficient to * target individual files for deletion, which should always be drafts. diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeletePidCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeletePidCommand.java index 274aeb3c3fd..c4910dd10c2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeletePidCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeletePidCommand.java @@ -1,7 +1,6 @@ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; @@ -11,7 +10,8 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.util.BundleUtil; import org.apache.commons.httpclient.HttpException; @@ -38,25 +38,26 @@ public DeletePidCommand(DataverseRequest request, Dataset dataset) { protected void executeImpl(CommandContext ctxt) throws CommandException { if (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser()) { - throw new PermissionException(BundleUtil.getStringFromBundle("admin.api.auth.mustBeSuperUser"), - this, Collections.singleton(Permission.EditDataset), dataset); + throw new PermissionException(BundleUtil.getStringFromBundle("admin.api.auth.mustBeSuperUser"), this, + Collections.singleton(Permission.EditDataset), dataset); } - String nonNullDefaultIfKeyNotFound = ""; - String protocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(protocol, ctxt); + PidProvider pidProvider = PidUtil.getPidProvider(dataset.getGlobalId().getProviderId()); + try { - idServiceBean.deleteIdentifier(dataset); + pidProvider.deleteIdentifier(dataset); // Success! Clear the create time, etc. dataset.setGlobalIdCreateTime(null); dataset.setIdentifierRegistered(false); ctxt.datasets().merge(dataset); } catch (HttpException hex) { - String message = BundleUtil.getStringFromBundle("pids.deletePid.failureExpected", Arrays.asList(dataset.getGlobalId().asString(), Integer.toString(hex.getReasonCode()))); + String message = BundleUtil.getStringFromBundle("pids.deletePid.failureExpected", + Arrays.asList(dataset.getGlobalId().asString(), Integer.toString(hex.getReasonCode()))); logger.info(message); throw new IllegalCommandException(message, this); } catch (Exception ex) { - String message = BundleUtil.getStringFromBundle("pids.deletePid.failureOther", Arrays.asList(dataset.getGlobalId().asString(), ex.getLocalizedMessage())); + String message = BundleUtil.getStringFromBundle("pids.deletePid.failureOther", + Arrays.asList(dataset.getGlobalId().asString(), ex.getLocalizedMessage())); logger.info(message); throw new IllegalCommandException(message, this); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java index 41093444360..877f3b81d7e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java @@ -3,6 +3,7 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.RoleAssignment; @@ -15,6 +16,8 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.search.IndexResponse; import java.util.ArrayList; import java.util.Collections; @@ -22,7 +25,7 @@ import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; + import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import java.io.IOException; import java.util.concurrent.Future; @@ -99,18 +102,21 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { } if (!doomed.isHarvested()) { - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); - try { - if (idServiceBean.alreadyRegistered(doomed)) { - idServiceBean.deleteIdentifier(doomed); - for (DataFile df : doomed.getFiles()) { - idServiceBean.deleteIdentifier(df); + GlobalId pid = doomed.getGlobalId(); + if (pid != null) { + PidProvider pidProvider = PidUtil.getPidProvider(pid.getProviderId()); + try { + if (pidProvider.alreadyRegistered(doomed)) { + pidProvider.deleteIdentifier(doomed); + for (DataFile df : doomed.getFiles()) { + pidProvider.deleteIdentifier(df); + } } + } catch (Exception e) { + logger.log(Level.WARNING, "Identifier deletion was not successful:", e.getMessage()); } - } catch (Exception e) { - logger.log(Level.WARNING, "Identifier deletion was not successful:", e.getMessage()); } - } + } toReIndex = managedDoomed.getOwner(); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index f5e70209744..37aeee231e1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -10,6 +10,7 @@ import edu.harvard.iq.dataverse.DatasetVersionUser; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.Embargo; import edu.harvard.iq.dataverse.UserNotification; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; @@ -19,6 +20,8 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.export.ExportService; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -29,17 +32,15 @@ import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; + import edu.harvard.iq.dataverse.batch.util.LoggingUtil; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.util.FileUtil; import java.util.ArrayList; import java.util.concurrent.Future; import org.apache.solr.client.solrj.SolrServerException; -import jakarta.ejb.EJB; -import jakarta.inject.Inject; - /** * @@ -117,9 +118,37 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // is this the first publication of the dataset? if (theDataset.getPublicationDate() == null) { theDataset.setReleaseUser((AuthenticatedUser) getUser()); - } - if ( theDataset.getPublicationDate() == null ) { + theDataset.setPublicationDate(new Timestamp(new Date().getTime())); + + // if there are any embargoed files in this version, we will save + // the latest availability date as the "embargoCitationDate" for future + // reference (if the files are not available yet, as of publishing of + // the dataset, this date will be used as the "ciatation date" of the dataset, + // instead of the publicatonDate, in compliance with the DataCite + // best practices). + // the code below replicates the logic that used to be in the method + // Dataset.getCitationDate() that calculated this adjusted date in real time. + + Timestamp latestEmbargoDate = null; + for (DataFile dataFile : theDataset.getFiles()) { + // this is the first version of the dataset that is being published. + // therefore we can iterate through .getFiles() instead of obtaining + // the DataFiles by going through the FileMetadatas in the current version. + Embargo embargo = dataFile.getEmbargo(); + if (embargo != null) { + // "dataAvailable" is not nullable in the Embargo class, no need for a null check + Timestamp embargoDate = Timestamp.valueOf(embargo.getDateAvailable().atStartOfDay()); + if (latestEmbargoDate == null || latestEmbargoDate.compareTo(embargoDate) < 0) { + latestEmbargoDate = embargoDate; + } + } + } + // the above loop could be easily replaced with a database query; + // but we iterate through .getFiles() elsewhere in the command, when + // updating and/or registering the files, so it should not result in + // an extra performance hit. + theDataset.setEmbargoCitationDate(latestEmbargoDate); } //Clear any external status @@ -321,7 +350,8 @@ private void validateDataFiles(Dataset dataset, CommandContext ctxt) throws Comm // (the decision was made to validate all the files on every // major release; we can revisit the decision if there's any // indication that this makes publishing take significantly longer. - if (maxFileSize == -1 || dataFile.getFilesize() < maxFileSize) { + String driverId = FileUtil.getStorageDriver(dataFile); + if(StorageIO.isDataverseAccessible(driverId) && (maxFileSize == -1 || dataFile.getFilesize() < maxFileSize)) { FileUtil.validateDataFileChecksum(dataFile); } else { @@ -356,56 +386,52 @@ private void validateDataFiles(Dataset dataset, CommandContext ctxt) throws Comm } private void publicizeExternalIdentifier(Dataset dataset, CommandContext ctxt) throws CommandException { - String protocol = getDataset().getProtocol(); - String authority = getDataset().getAuthority(); - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(protocol, ctxt); - - if (idServiceBean != null) { - - try { - String currentGlobalIdProtocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, ""); - String currentGlobalAuthority = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Authority, ""); - String dataFilePIDFormat = ctxt.settings().getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); - boolean isFilePIDsEnabled = ctxt.systemConfig().isFilePIDsEnabledForCollection(getDataset().getOwner()); - // We will skip trying to register the global identifiers for datafiles - // if "dependent" file-level identifiers are requested, AND the naming - // protocol, or the authority of the dataset global id is different from - // what's currently configured for the Dataverse. In other words - // we can't get "dependent" DOIs assigned to files in a dataset - // with the registered id that is a handle; or even a DOI, but in - // an authority that's different from what's currently configured. - // Additionaly in 4.9.3 we have added a system variable to disable - // registering file PIDs on the installation level. - if (((currentGlobalIdProtocol.equals(protocol) && currentGlobalAuthority.equals(authority)) - || dataFilePIDFormat.equals("INDEPENDENT")) - && isFilePIDsEnabled - && dataset.getLatestVersion().getMinorVersionNumber() != null - && dataset.getLatestVersion().getMinorVersionNumber().equals((long) 0)) { - //A false return value indicates a failure in calling the service - for (DataFile df : dataset.getFiles()) { - logger.log(Level.FINE, "registering global id for file {0}", df.getId()); - //A false return value indicates a failure in calling the service - if (!idServiceBean.publicizeIdentifier(df)) { - throw new Exception(); - } - df.setGlobalIdCreateTime(getTimestamp()); - df.setIdentifierRegistered(true); + PidProvider pidProvider = ctxt.dvObjects().getEffectivePidGenerator(dataset); + try { + // We will skip trying to register the global identifiers for datafiles + // if "dependent" file-level identifiers are requested, AND the naming + // protocol, or the authority of the dataset global id is different from + // what's currently configured for the Dataverse. In other words + // we can't get "dependent" DOIs assigned to files in a dataset + // with the registered id that is a handle; or even a DOI, but in + // an authority that's different from what's currently configured. + // Additionaly in 4.9.3 we have added a system variable to disable + // registering file PIDs on the installation level. + boolean registerGlobalIdsForFiles = ctxt.systemConfig().isFilePIDsEnabledForCollection( + getDataset().getOwner()) + && pidProvider.canCreatePidsLike(dataset.getGlobalId()); + + if (registerGlobalIdsForFiles + && dataset.getLatestVersion().getMinorVersionNumber() != null + && dataset.getLatestVersion().getMinorVersionNumber().equals((long) 0)) { + // A false return value indicates a failure in calling the service + for (DataFile df : dataset.getFiles()) { + logger.log(Level.FINE, "registering global id for file {0}", df.getId()); + // A false return value indicates a failure in calling the service + if (!pidProvider.publicizeIdentifier(df)) { + throw new Exception(); } + df.setGlobalIdCreateTime(getTimestamp()); + df.setIdentifierRegistered(true); } - if (!idServiceBean.publicizeIdentifier(dataset)) { - throw new Exception(); - } - dataset.setGlobalIdCreateTime(new Date()); // TODO these two methods should be in the responsibility of the idServiceBean. - dataset.setIdentifierRegistered(true); - } catch (Throwable e) { - logger.warning("Failed to register the identifier "+dataset.getGlobalId().asString()+", or to register a file in the dataset; notifying the user(s), unlocking the dataset"); - - // Send failure notification to the user: - notifyUsersDatasetPublishStatus(ctxt, dataset, UserNotification.Type.PUBLISHFAILED_PIDREG); - - ctxt.datasets().removeDatasetLocks(dataset, DatasetLock.Reason.finalizePublication); - throw new CommandException(BundleUtil.getStringFromBundle("dataset.publish.error", idServiceBean.getProviderInformation()), this); } + if (!pidProvider.publicizeIdentifier(dataset)) { + throw new Exception(); + } + dataset.setGlobalIdCreateTime(new Date()); // TODO these two methods should be in the responsibility of the + // pidProvider. + dataset.setIdentifierRegistered(true); + } catch (Throwable e) { + logger.warning("Failed to register the identifier " + dataset.getGlobalId().asString() + + ", or to register a file in the dataset; notifying the user(s), unlocking the dataset"); + + // Send failure notification to the user: + notifyUsersDatasetPublishStatus(ctxt, dataset, UserNotification.Type.PUBLISHFAILED_PIDREG); + + ctxt.datasets().removeDatasetLocks(dataset, DatasetLock.Reason.finalizePublication); + throw new CommandException( + BundleUtil.getStringFromBundle("dataset.publish.error", pidProvider.getProviderInformation()), + this); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java new file mode 100644 index 00000000000..49f14e7c280 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java @@ -0,0 +1,51 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import java.util.Collections; +import java.util.Map; +import java.util.Set; +import java.util.logging.Logger; + +/** + * + * @author landreev + * The command doesn't do much. It's sole purpose is to check the permissions + * when it's called by the /api/dataverses/.../storage/quota api. + */ +// @RequiredPermissions - none defined, dynamic +public class GetCollectionQuotaCommand extends AbstractCommand { + + private static final Logger logger = Logger.getLogger(GetCollectionQuotaCommand.class.getCanonicalName()); + + private final Dataverse dataverse; + + public GetCollectionQuotaCommand(DataverseRequest aRequest, Dataverse target) { + super(aRequest, target); + dataverse = target; + } + + @Override + public Long execute(CommandContext ctxt) throws CommandException { + + if (dataverse != null && dataverse.getStorageQuota() != null) { + return dataverse.getStorageQuota().getAllocation(); + } + + return null; + } + + @Override + public Map> getRequiredPermissions() { + return Collections.singletonMap("", + dataverse.isReleased() ? Collections.emptySet() + : Collections.singleton(Permission.ViewUnpublishedDataverse)); + } +} + + diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java new file mode 100644 index 00000000000..c30a5a34a81 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java @@ -0,0 +1,45 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import java.util.logging.Logger; + +/** + * + * @author landreev + */ +@RequiredPermissions(Permission.ManageDataversePermissions) +// alternatively, we could make it dynamic - public for published collections +// and Permission.ViewUnpublishedDataverse required otherwise (?) +public class GetCollectionStorageUseCommand extends AbstractCommand { + + private static final Logger logger = Logger.getLogger(GetCollectionStorageUseCommand.class.getCanonicalName()); + + private final Dataverse collection; + + public GetCollectionStorageUseCommand(DataverseRequest aRequest, Dataverse target) { + super(aRequest, target); + collection = target; + } + + @Override + public Long execute(CommandContext ctxt) throws CommandException { + + if (collection == null) { + throw new CommandException("null collection passed to get storage use command", this); + } + return ctxt.storageUse().findStorageSizeByDvContainerId(collection.getId()); + } + + /*@Override + public Map> getRequiredPermissions() { + return Collections.singletonMap("", + dataverse.isReleased() ? Collections.emptySet() + : Collections.singleton(Permission.ViewUnpublishedDataverse)); + }*/ +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDataFileCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDataFileCommand.java index fdf47bbd2dd..369f3cbfda6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDataFileCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDataFileCommand.java @@ -11,35 +11,34 @@ import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; + import java.util.Collections; import java.util.Map; import java.util.Set; /** - * * @author Matthew */ // no annotations here, since permissions are dynamically decided // based off GetDatasetCommand for similar permissions checking public class GetDataFileCommand extends AbstractCommand { - private final DataFile df; + private final DataFile dataFile; - public GetDataFileCommand(DataverseRequest aRequest, DataFile anAffectedDataset) { - super(aRequest, anAffectedDataset); - df = anAffectedDataset; + public GetDataFileCommand(DataverseRequest aRequest, DataFile dataFile) { + super(aRequest, dataFile); + this.dataFile = dataFile; } @Override public DataFile execute(CommandContext ctxt) throws CommandException { - return df; + return dataFile; } @Override public Map> getRequiredPermissions() { return Collections.singletonMap("", - df.isReleased() ? Collections.emptySet() - : Collections.singleton(Permission.ViewUnpublishedDataset)); + dataFile.isReleased() ? Collections.emptySet() + : Collections.singleton(Permission.ViewUnpublishedDataset)); } - } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java new file mode 100644 index 00000000000..2d5e1251614 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java @@ -0,0 +1,38 @@ + +package edu.harvard.iq.dataverse.engine.command.impl; + + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; + + +import java.util.logging.Logger; + +/** + * + * @author stephenkraffmiller + */ +@RequiredPermissions(Permission.AddDataset) +public class GetDatasetSchemaCommand extends AbstractCommand { + + private static final Logger logger = Logger.getLogger(GetDatasetSchemaCommand.class.getCanonicalName()); + + private final Dataverse dataverse; + + public GetDatasetSchemaCommand(DataverseRequest aRequest, Dataverse target) { + super(aRequest, target); + dataverse = target; + } + + @Override + public String execute(CommandContext ctxt) throws CommandException { + return ctxt.dataverses().getCollectionDatasetSchema(dataverse.getAlias()); + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetStorageSizeCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetStorageSizeCommand.java index f1f27fdcee2..09b33c4efc4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetStorageSizeCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetStorageSizeCommand.java @@ -7,7 +7,6 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; @@ -15,6 +14,7 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.util.BundleUtil; + import java.io.IOException; import java.util.Collections; import java.util.Map; @@ -38,7 +38,7 @@ public class GetDatasetStorageSizeCommand extends AbstractCommand { public enum Mode { STORAGE, DOWNLOAD - }; + } public GetDatasetStorageSizeCommand(DataverseRequest aRequest, Dataset target) { super(aRequest, target); @@ -58,21 +58,20 @@ public GetDatasetStorageSizeCommand(DataverseRequest aRequest, Dataset target, b @Override public Long execute(CommandContext ctxt) throws CommandException { - logger.fine("getDataverseStorageSize called on " + dataset.getDisplayName()); - if (dataset == null) { // should never happen - must indicate some data corruption in the database throw new CommandException(BundleUtil.getStringFromBundle("datasets.api.listing.error"), this); } + logger.fine("getDataverseStorageSize called on " + dataset.getDisplayName()); + try { return ctxt.datasets().findStorageSize(dataset, countCachedFiles, mode, version); } catch (IOException ex) { throw new CommandException(BundleUtil.getStringFromBundle("datasets.api.datasize.ioerror"), this); } - } - + @Override public Map> getRequiredPermissions() { // for data file check permission on owning dataset diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDraftFileMetadataIfAvailableCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDraftFileMetadataIfAvailableCommand.java index 14999548b34..1d83f0dd1f4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDraftFileMetadataIfAvailableCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDraftFileMetadataIfAvailableCommand.java @@ -1,8 +1,6 @@ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; @@ -12,25 +10,19 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; /** - * * @author Matthew */ -@RequiredPermissions( Permission.ViewUnpublishedDataset ) -public class GetDraftFileMetadataIfAvailableCommand extends AbstractCommand{ - private final DataFile df; +@RequiredPermissions(Permission.ViewUnpublishedDataset) +public class GetDraftFileMetadataIfAvailableCommand extends AbstractCommand { + private final DataFile dataFile; - public GetDraftFileMetadataIfAvailableCommand(DataverseRequest aRequest, DataFile dataFile) { - super(aRequest, dataFile); - df = dataFile; + public GetDraftFileMetadataIfAvailableCommand(DataverseRequest request, DataFile dataFile) { + super(request, dataFile); + this.dataFile = dataFile; } @Override public FileMetadata execute(CommandContext ctxt) throws CommandException { - FileMetadata fm = df.getLatestFileMetadata(); - if(fm.getDatasetVersion().getVersionState().equals(DatasetVersion.VersionState.DRAFT)) { - return df.getLatestFileMetadata(); - } - return null; + return dataFile.getDraftFileMetadata(); } - } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java index 680a5c3aaef..7bcc851bde2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java @@ -17,29 +17,32 @@ /** * Get the latest version of a dataset a user can view. + * * @author Naomi */ // No permission needed to view published dvObjects @RequiredPermissions({}) -public class GetLatestAccessibleDatasetVersionCommand extends AbstractCommand{ +public class GetLatestAccessibleDatasetVersionCommand extends AbstractCommand { private final Dataset ds; + private final boolean includeDeaccessioned; + private boolean checkPerms; public GetLatestAccessibleDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset) { + this(aRequest, anAffectedDataset, false, false); + } + + public GetLatestAccessibleDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned, boolean checkPerms) { super(aRequest, anAffectedDataset); ds = anAffectedDataset; + this.includeDeaccessioned = includeDeaccessioned; + this.checkPerms = checkPerms; } @Override public DatasetVersion execute(CommandContext ctxt) throws CommandException { - if (ds.getLatestVersion().isDraft() && ctxt.permissions().requestOn(getRequest(), ds).has(Permission.ViewUnpublishedDataset)) { return ctxt.engine().submit(new GetDraftDatasetVersionCommand(getRequest(), ds)); } - - return ctxt.engine().submit(new GetLatestPublishedDatasetVersionCommand(getRequest(), ds)); - + return ctxt.engine().submit(new GetLatestPublishedDatasetVersionCommand(getRequest(), ds, includeDeaccessioned, checkPerms)); } - - - } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleFileMetadataCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleFileMetadataCommand.java new file mode 100644 index 00000000000..05f3c73bde0 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleFileMetadataCommand.java @@ -0,0 +1,30 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; + +public class GetLatestAccessibleFileMetadataCommand extends AbstractGetPublishedFileMetadataCommand { + + public GetLatestAccessibleFileMetadataCommand(DataverseRequest request, DataFile dataFile, boolean includeDeaccessioned) { + super(request, dataFile, includeDeaccessioned); + } + + @Override + public FileMetadata execute(CommandContext ctxt) throws CommandException { + FileMetadata fileMetadata = null; + + if (ctxt.permissions().requestOn(getRequest(), dataFile.getOwner()).has(Permission.ViewUnpublishedDataset)) { + fileMetadata = dataFile.getDraftFileMetadata(); + } + + if (fileMetadata == null) { + fileMetadata = getLatestPublishedFileMetadata(ctxt); + } + + return fileMetadata; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java index 18adff2e55c..0afcbe2d0bb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; @@ -9,26 +10,56 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; /** - * * @author Naomi */ // No permission needed to view published dvObjects @RequiredPermissions({}) -public class GetLatestPublishedDatasetVersionCommand extends AbstractCommand{ +public class GetLatestPublishedDatasetVersionCommand extends AbstractCommand { private final Dataset ds; - + private final boolean includeDeaccessioned; + private final boolean checkPermsWhenDeaccessioned; + public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset) { + this(aRequest, anAffectedDataset, false, false); + } + + public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned, boolean checkPermsWhenDeaccessioned) { super(aRequest, anAffectedDataset); ds = anAffectedDataset; + this.includeDeaccessioned = includeDeaccessioned; + this.checkPermsWhenDeaccessioned = checkPermsWhenDeaccessioned; } + /* + * This command depending on the requested parameters will return: + * + * If the user requested to include a deaccessioned dataset with the files, the command will return the deaccessioned version if the user has permissions to view the files. Otherwise, it will return null. + * If the user requested to include a deaccessioned dataset but did not request the files, the command will return the deaccessioned version. + * If the user did not request to include a deaccessioned dataset, the command will return the latest published version. + * + */ @Override public DatasetVersion execute(CommandContext ctxt) throws CommandException { - for (DatasetVersion dsv: ds.getVersions()) { - if (dsv.isReleased()) { - return dsv; - } + DatasetVersion dsVersionResult = getReleaseOrDeaccessionedDatasetVersion(); + if (dsVersionResult != null && userHasPermissionsOnDatasetVersion(dsVersionResult, checkPermsWhenDeaccessioned, ctxt, ds)) { + return dsVersionResult; + } + return null; + } + + private DatasetVersion getReleaseOrDeaccessionedDatasetVersion() { + for (DatasetVersion dsVersion : ds.getVersions()) { + if (dsVersion.isReleased() || (includeDeaccessioned && dsVersion.isDeaccessioned())) { + return dsVersion; } + } return null; + } + + private boolean userHasPermissionsOnDatasetVersion(DatasetVersion dsVersionResult, boolean checkPermsWhenDeaccessioned, CommandContext ctxt, Dataset ds) { + if (dsVersionResult.isDeaccessioned() && checkPermsWhenDeaccessioned) { + return ctxt.permissions().requestOn(getRequest(), ds).has(Permission.EditDataset); } - } \ No newline at end of file + return true; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedFileMetadataCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedFileMetadataCommand.java new file mode 100644 index 00000000000..fc13dba1a34 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedFileMetadataCommand.java @@ -0,0 +1,19 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; + +public class GetLatestPublishedFileMetadataCommand extends AbstractGetPublishedFileMetadataCommand { + + public GetLatestPublishedFileMetadataCommand(DataverseRequest request, DataFile dataFile, boolean includeDeaccessioned) { + super(request, dataFile, includeDeaccessioned); + } + + @Override + public FileMetadata execute(CommandContext ctxt) throws CommandException { + return getLatestPublishedFileMetadata(ctxt); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java index 2de2adff099..b98cd70a4da 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java @@ -9,12 +9,12 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + import java.io.IOException; import java.io.InputStream; import java.util.logging.Logger; -import jakarta.json.Json; import jakarta.json.JsonObject; -import jakarta.json.JsonReader; @RequiredPermissions(Permission.EditDataset) public class GetProvJsonCommand extends AbstractCommand { @@ -35,13 +35,13 @@ public JsonObject execute(CommandContext ctxt) throws CommandException { try { StorageIO dataAccess = dataFile.getStorageIO(); - InputStream inputStream = dataAccess.getAuxFileAsInputStream(provJsonExtension); - JsonObject jsonObject = null; - if(null != inputStream) { - JsonReader jsonReader = Json.createReader(inputStream); - jsonObject = jsonReader.readObject(); + try (InputStream inputStream = dataAccess.getAuxFileAsInputStream(provJsonExtension)) { + JsonObject jsonObject = null; + if (null != inputStream) { + jsonObject = JsonUtil.getJsonObject(inputStream); + } + return jsonObject; } - return jsonObject; } catch (IOException ex) { String error = "Exception caught in DataAccess.getStorageIO(dataFile) getting file. Error: " + ex; throw new IllegalCommandException(error, this); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedDatasetVersionCommand.java index 3efb38e4a91..07256f057e2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedDatasetVersionCommand.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; @@ -15,27 +16,45 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; /** - * * @author Naomi */ // No permission needed to view published dvObjects @RequiredPermissions({}) -public class GetSpecificPublishedDatasetVersionCommand extends AbstractCommand{ +public class GetSpecificPublishedDatasetVersionCommand extends AbstractCommand { private final Dataset ds; private final long majorVersion; private final long minorVersion; - + private boolean includeDeaccessioned; + private boolean checkPerms; + public GetSpecificPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, long majorVersionNum, long minorVersionNum) { + this(aRequest, anAffectedDataset, majorVersionNum, minorVersionNum, false, false); + } + + + + public GetSpecificPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, long majorVersionNum, long minorVersionNum, boolean includeDeaccessioned, boolean checkPerms) { super(aRequest, anAffectedDataset); ds = anAffectedDataset; majorVersion = majorVersionNum; minorVersion = minorVersionNum; + this.includeDeaccessioned = includeDeaccessioned; + this.checkPerms = checkPerms; } + @Override public DatasetVersion execute(CommandContext ctxt) throws CommandException { - for (DatasetVersion dsv: ds.getVersions()) { - if (dsv.isReleased()) { + + for (DatasetVersion dsv : ds.getVersions()) { + if (dsv.isReleased() || (includeDeaccessioned && dsv.isDeaccessioned())) { + + if(dsv.isDeaccessioned() && checkPerms){ + if(!ctxt.permissions().requestOn(getRequest(), ds).has(Permission.EditDataset)){ + return null; + } + } + if (dsv.getVersionNumber().equals(majorVersion) && dsv.getMinorVersionNumber().equals(minorVersion)) { return dsv; } @@ -43,5 +62,5 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { } return null; } - -} \ No newline at end of file + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedFileMetadataByDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedFileMetadataByDatasetVersionCommand.java new file mode 100644 index 00000000000..deffbfb57ee --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedFileMetadataByDatasetVersionCommand.java @@ -0,0 +1,34 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; + +public class GetSpecificPublishedFileMetadataByDatasetVersionCommand extends AbstractGetPublishedFileMetadataCommand { + private final long majorVersion; + private final long minorVersion; + + public GetSpecificPublishedFileMetadataByDatasetVersionCommand(DataverseRequest request, DataFile dataFile, long majorVersion, long minorVersion, boolean includeDeaccessioned) { + super(request, dataFile, includeDeaccessioned); + this.majorVersion = majorVersion; + this.minorVersion = minorVersion; + } + + @Override + public FileMetadata execute(CommandContext ctxt) throws CommandException { + return dataFile.getFileMetadatas().stream() + .filter(fileMetadata -> isRequestedVersionFileMetadata(fileMetadata, ctxt)) + .findFirst() + .orElse(null); + } + + private boolean isRequestedVersionFileMetadata(FileMetadata fileMetadata, CommandContext ctxt) { + DatasetVersion datasetVersion = fileMetadata.getDatasetVersion(); + return isDatasetVersionAccessible(datasetVersion, dataFile.getOwner(), ctxt) + && datasetVersion.getVersionNumber().equals(majorVersion) + && datasetVersion.getMinorVersionNumber().equals(minorVersion); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetUserTracesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetUserTracesCommand.java index e41d70d9804..df0b5d785e4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetUserTracesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetUserTracesCommand.java @@ -212,7 +212,7 @@ public JsonObjectBuilder execute(CommandContext ctxt) throws CommandException { try { JsonObjectBuilder gbe = Json.createObjectBuilder() .add("id", guestbookResponse.getId()) - .add("downloadType", guestbookResponse.getDownloadtype()) + .add("eventType", guestbookResponse.getEventType()) .add("filename", guestbookResponse.getDataFile().getCurrentName()) .add("date", guestbookResponse.getResponseDate()) .add("guestbookName", guestbookResponse.getGuestbook().getName()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GrantSuperuserStatusCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GrantSuperuserStatusCommand.java index 42af43b7247..ec8c8976260 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GrantSuperuserStatusCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GrantSuperuserStatusCommand.java @@ -14,7 +14,7 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportDatasetCommand.java index 478272950bd..772c989264c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportDatasetCommand.java @@ -1,14 +1,14 @@ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; -import edu.harvard.iq.dataverse.pidproviders.FakePidProviderServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import java.io.IOException; import java.util.Collections; @@ -80,9 +80,9 @@ protected void additionalParameterTests(CommandContext ctxt) throws CommandExcep * Dataverse) but aren't findable to be used. That could be the case if, for * example, someone was importing a draft dataset from elsewhere. */ - GlobalIdServiceBean globalIdServiceBean = GlobalIdServiceBean.getBean(ds.getProtocol(), ctxt); - if (globalIdServiceBean != null) { - if (globalIdServiceBean.alreadyRegistered(ds.getGlobalId(), true)) { + PidProvider pidProvider = PidUtil.getPidProvider(ds.getGlobalId().getProviderId()); + if (pidProvider != null) { + if (pidProvider.alreadyRegistered(ds.getGlobalId(), true)) { return; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java index c03c77d42fd..9a75f437b66 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java @@ -83,7 +83,7 @@ public JsonObject execute(CommandContext ctxt) throws CommandException { // We probably want package files to be able to use specific stores instead. // More importantly perhaps, the approach above does not take into account // if the dataset may have an AlternativePersistentIdentifier, that may be - // designated isStorageLocationDesignator() - i.e., if a different identifer + // designated isStorageLocationDesignator() - i.e., if a different identifier // needs to be used to name the storage directory, instead of the main/current // persistent identifier above. if (!isValidDirectory(directory)) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java index 1858ba377ab..b619d32cc7e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java @@ -6,16 +6,18 @@ import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import java.util.ArrayList; import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Collections; /** * * @author michael */ -@RequiredPermissions( Permission.ManageDataversePermissions ) +// no annotations here, since permissions are dynamically decided public class ListRoleAssignments extends AbstractCommand> { private final DvObject definitionPoint; @@ -34,5 +36,12 @@ public List execute(CommandContext ctxt) throws CommandException } return ctxt.permissions().assignmentsOn(definitionPoint); } + + @Override + public Map> getRequiredPermissions() { + return Collections.singletonMap("", + definitionPoint.isInstanceofDataset() ? Collections.singleton(Permission.ManageDatasetPermissions) + : Collections.singleton(Permission.ManageDataversePermissions)); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListVersionsCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListVersionsCommand.java index 51283f29156..b93833ffdf9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListVersionsCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListVersionsCommand.java @@ -14,6 +14,7 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException; import java.util.LinkedList; import java.util.List; @@ -23,23 +24,57 @@ */ // No permission needed to view published dvObjects @RequiredPermissions({}) -public class ListVersionsCommand extends AbstractCommand>{ - +public class ListVersionsCommand extends AbstractCommand> { + private final Dataset ds; + private final Integer limit; + private final Integer offset; + private final Boolean deepLookup; - public ListVersionsCommand(DataverseRequest aRequest, Dataset aDataset) { - super(aRequest, aDataset); - ds = aDataset; - } + public ListVersionsCommand(DataverseRequest aRequest, Dataset aDataset) { + this(aRequest, aDataset, null, null); + } + + public ListVersionsCommand(DataverseRequest aRequest, Dataset aDataset, Integer offset, Integer limit) { + this(aRequest, aDataset, null, null, false); + } + + public ListVersionsCommand(DataverseRequest aRequest, Dataset aDataset, Integer offset, Integer limit, boolean deepLookup) { + super(aRequest, aDataset); + ds = aDataset; + this.offset = offset; + this.limit = limit; + this.deepLookup = deepLookup; + } - @Override - public List execute(CommandContext ctxt) throws CommandException { - List outputList = new LinkedList<>(); - for ( DatasetVersion dsv : ds.getVersions() ) { - if (dsv.isReleased() || ctxt.permissions().request( getRequest() ).on(ds).has(Permission.EditDataset)) { - outputList.add(dsv); + @Override + public List execute(CommandContext ctxt) throws CommandException { + + boolean includeUnpublished = ctxt.permissions().request(getRequest()).on(ds).has(Permission.EditDataset); + + if (offset == null && limit == null) { + + List outputList = new LinkedList<>(); + for (DatasetVersion dsv : ds.getVersions()) { + if (dsv.isReleased() || includeUnpublished) { + if (deepLookup) { + // @todo: when "deep"/extended lookup is requested, and + // we call .findDeep() to look up each version again, + // there is probably a more economical way to obtain the + // numeric ids of the versions, by a direct single query, + // rather than go through ds.getVersions() like we are now. + dsv = ctxt.datasetVersion().findDeep(dsv.getId()); + if (dsv == null) { + throw new CommandExecutionException("Failed to look up full list of dataset versions", this); + } + } + outputList.add(dsv); + } } - } - return outputList; - } + return outputList; + } else { + // Only a partial list (one "page"-worth) of versions is being requested + return ctxt.datasetVersion().findVersions(ds.getId(), offset, limit, includeUnpublished); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java index f5ef121dee2..6b95f3b6de1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java @@ -2,7 +2,6 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetLock; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.Command; @@ -11,6 +10,7 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -130,24 +130,15 @@ public PublishDatasetResult execute(CommandContext ctxt) throws CommandException // ... // Additionaly in 4.9.3 we have added a system variable to disable // registering file PIDs on the installation level. - String currentGlobalIdProtocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, ""); - String currentGlobalAuthority= ctxt.settings().getValueForKey(SettingsServiceBean.Key.Authority, ""); - String dataFilePIDFormat = ctxt.settings().getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); boolean registerGlobalIdsForFiles = - (currentGlobalIdProtocol.equals(theDataset.getProtocol()) || dataFilePIDFormat.equals("INDEPENDENT")) - && ctxt.systemConfig().isFilePIDsEnabledForCollection(theDataset.getOwner()); - - if ( registerGlobalIdsForFiles ){ - registerGlobalIdsForFiles = currentGlobalAuthority.equals( theDataset.getAuthority() ); - } + ctxt.systemConfig().isFilePIDsEnabledForCollection(getDataset().getOwner()) && + ctxt.dvObjects().getEffectivePidGenerator(getDataset()).canCreatePidsLike(getDataset().getGlobalId()); boolean validatePhysicalFiles = ctxt.systemConfig().isDatafileValidationOnPublishEnabled(); // As of v5.0, publishing a dataset is always done asynchronously, // with the dataset locked for the duration of the operation. - //if ((registerGlobalIdsForFiles || validatePhysicalFiles) - // && theDataset.getFiles().size() > ctxt.systemConfig().getPIDAsynchRegFileCount()) { String info = "Publishing the dataset; "; info += registerGlobalIdsForFiles ? "Registering PIDs for Datafiles; " : ""; @@ -178,15 +169,6 @@ public PublishDatasetResult execute(CommandContext ctxt) throws CommandException // method: //ctxt.datasets().callFinalizePublishCommandAsynchronously(theDataset.getId(), ctxt, request, datasetExternallyReleased); return new PublishDatasetResult(theDataset, Status.Inprogress); - - /** - * Code for for "synchronous" (while-you-wait) publishing - * is preserved below, commented out: - } else { - // Synchronous publishing (no workflow involved) - theDataset = ctxt.engine().submit(new FinalizeDatasetPublicationCommand(theDataset, getRequest(),datasetExternallyReleased)); - return new PublishDatasetResult(theDataset, Status.Completed); - } */ } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java index 779bc7fb7fe..7b80871a1e0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java @@ -4,20 +4,17 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.DvObjectContainer; import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; + import java.sql.Timestamp; import java.util.Date; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; -import edu.harvard.iq.dataverse.HandlenetServiceBean; -import edu.harvard.iq.dataverse.batch.util.LoggingUtil; -import java.io.IOException; -import org.apache.solr.client.solrj.SolrServerException; /** * @@ -44,48 +41,37 @@ public RegisterDvObjectCommand(DataverseRequest aRequest, DvObject target, Boole @Override protected void executeImpl(CommandContext ctxt) throws CommandException { + DvObjectContainer container = (target instanceof DvObjectContainer) ? (DvObjectContainer) target : target.getOwner(); + // Get the pidProvider that is configured to mint new IDs + PidProvider pidProvider = ctxt.dvObjects().getEffectivePidGenerator(container); if(this.migrateHandle){ //Only continue if you can successfully migrate the handle - if (!processMigrateHandle(ctxt)) return; + if (HandlePidProvider.HDL_PROTOCOL.equals(pidProvider.getProtocol()) || !processMigrateHandle(ctxt)) return; } - String nonNullDefaultIfKeyNotFound = ""; - String protocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); - String authority = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); - // Get the idServiceBean that is configured to mint new IDs - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(protocol, ctxt); + try { //Test to see if identifier already present //if so, leave. if (target.getIdentifier() == null || target.getIdentifier().isEmpty()) { - if (target.isInstanceofDataset()) { - target.setIdentifier(idServiceBean.generateDatasetIdentifier((Dataset) target)); - - } else { - target.setIdentifier(idServiceBean.generateDataFileIdentifier((DataFile) target)); - } - if (target.getProtocol() == null) { - target.setProtocol(protocol); - } - if (target.getAuthority() == null) { - target.setAuthority(authority); - } + pidProvider.generatePid(target); } - if (idServiceBean.alreadyRegistered(target)) { + + if (pidProvider.alreadyRegistered(target)) { return; } - String doiRetString = idServiceBean.createIdentifier(target); + String doiRetString = pidProvider.createIdentifier(target); if (doiRetString != null && doiRetString.contains(target.getIdentifier())) { - if (!idServiceBean.registerWhenPublished()) { + if (!pidProvider.registerWhenPublished()) { // Should register ID before publicize() is called - // For example, DOIEZIdServiceBean tries to recreate the id if the identifier isn't registered before + // For example, DOIEZIdProvider tries to recreate the id if the identifier isn't registered before // publicizeIdentifier is called target.setIdentifierRegistered(true); target.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); } if (target.isReleased()) { - idServiceBean.publicizeIdentifier(target); + pidProvider.publicizeIdentifier(target); } - if (idServiceBean.registerWhenPublished() && target.isReleased()) { + if (pidProvider.registerWhenPublished() && target.isReleased()) { target.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); target.setIdentifierRegistered(true); } @@ -95,27 +81,21 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { Dataset dataset = (Dataset) target; for (DataFile df : dataset.getFiles()) { if (df.getIdentifier() == null || df.getIdentifier().isEmpty()) { - df.setIdentifier(idServiceBean.generateDataFileIdentifier(df)); - if (df.getProtocol() == null || df.getProtocol().isEmpty()) { - df.setProtocol(protocol); - } - if (df.getAuthority() == null || df.getAuthority().isEmpty()) { - df.setAuthority(authority); - } + pidProvider.generatePid(df); } - doiRetString = idServiceBean.createIdentifier(df); + doiRetString = pidProvider.createIdentifier(df); if (doiRetString != null && doiRetString.contains(df.getIdentifier())) { - if (!idServiceBean.registerWhenPublished()) { + if (!pidProvider.registerWhenPublished()) { // Should register ID before publicize() is called - // For example, DOIEZIdServiceBean tries to recreate the id if the identifier isn't registered before + // For example, DOIEZIdProvider tries to recreate the id if the identifier isn't registered before // publicizeIdentifier is called df.setIdentifierRegistered(true); df.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); } if (df.isReleased()) { - idServiceBean.publicizeIdentifier(df); + pidProvider.publicizeIdentifier(df); } - if (idServiceBean.registerWhenPublished() && df.isReleased()) { + if (pidProvider.registerWhenPublished() && df.isReleased()) { df.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); df.setIdentifierRegistered(true); } @@ -145,7 +125,7 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { private Boolean processMigrateHandle (CommandContext ctxt){ boolean retval = true; if(!target.isInstanceofDataset()) return false; - if(!target.getProtocol().equals(HandlenetServiceBean.HDL_PROTOCOL)) return false; + if(!target.getProtocol().equals(HandlePidProvider.HDL_PROTOCOL)) return false; AlternativePersistentIdentifier api = new AlternativePersistentIdentifier(); api.setProtocol(target.getProtocol()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestAccessCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestAccessCommand.java index b87b9a73aa5..bf291427341 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestAccessCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestAccessCommand.java @@ -5,7 +5,13 @@ */ package edu.harvard.iq.dataverse.engine.command.impl; +import java.util.Arrays; +import java.util.List; +import java.util.logging.Logger; + import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.FileAccessRequest; +import edu.harvard.iq.dataverse.GuestbookResponse; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; @@ -22,25 +28,39 @@ */ @RequiredPermissions({}) public class RequestAccessCommand extends AbstractCommand { - + + private static final Logger logger = Logger.getLogger(RequestAccessCommand.class.getName()); + private final DataFile file; private final AuthenticatedUser requester; + private final FileAccessRequest fileAccessRequest; private final Boolean sendNotification; - public RequestAccessCommand(DataverseRequest dvRequest, DataFile file) { // for data file check permission on owning dataset - super(dvRequest, file); - this.file = file; + this(dvRequest, file, false); + } + + public RequestAccessCommand(DataverseRequest dvRequest, DataFile file, Boolean sendNotification) { + // for data file check permission on owning dataset + super(dvRequest, file); + this.file = file; this.requester = (AuthenticatedUser) dvRequest.getUser(); - this.sendNotification = false; + this.fileAccessRequest = new FileAccessRequest(file, requester); + this.sendNotification = sendNotification; + } + + public RequestAccessCommand(DataverseRequest dvRequest, DataFile file, GuestbookResponse gbr) { + this(dvRequest, file, gbr, false); } - - public RequestAccessCommand(DataverseRequest dvRequest, DataFile file, Boolean sendNotification) { + + public RequestAccessCommand(DataverseRequest dvRequest, DataFile file, GuestbookResponse gbr, + Boolean sendNotification) { // for data file check permission on owning dataset - super(dvRequest, file); - this.file = file; + super(dvRequest, file); + this.file = file; this.requester = (AuthenticatedUser) dvRequest.getUser(); + this.fileAccessRequest = new FileAccessRequest(file, requester, gbr); this.sendNotification = sendNotification; } @@ -50,21 +70,36 @@ public DataFile execute(CommandContext ctxt) throws CommandException { if (!file.getOwner().isFileAccessRequest()) { throw new CommandException(BundleUtil.getStringFromBundle("file.requestAccess.notAllowed"), this); } - - //if user already has permission to download file or the file is public throw command exception - if (!file.isRestricted() || ctxt.permissions().requestOn(this.getRequest(), file).has(Permission.DownloadFile)) { - throw new CommandException(BundleUtil.getStringFromBundle("file.requestAccess.notAllowed.alreadyHasDownloadPermisssion"), this); + + // if user already has permission to download file or the file is public throw + // command exception + logger.fine("User: " + this.getRequest().getAuthenticatedUser().getName()); + logger.fine("File: " + file.getId() + " : restricted?: " + file.isRestricted()); + logger.fine( + "permission?: " + ctxt.permissions().requestOn(this.getRequest(), file).has(Permission.DownloadFile)); + if (!file.isRestricted() + || ctxt.permissions().requestOn(this.getRequest(), file).has(Permission.DownloadFile)) { + throw new CommandException( + BundleUtil.getStringFromBundle("file.requestAccess.notAllowed.alreadyHasDownloadPermisssion"), + this); } - if(FileUtil.isActivelyEmbargoed(file)) { + if (FileUtil.isActivelyEmbargoed(file)) { throw new CommandException(BundleUtil.getStringFromBundle("file.requestAccess.notAllowed.embargoed"), this); } - file.addFileAccessRequester(requester); + file.addFileAccessRequest(fileAccessRequest); + List fars = requester.getFileAccessRequests(); + if(fars!=null) { + fars.add(fileAccessRequest); + } else { + requester.setFileAccessRequests(Arrays.asList(fileAccessRequest)); + } + DataFile savedFile = ctxt.files().save(file); if (sendNotification) { - ctxt.fileDownload().sendRequestFileAccessNotification(this.file, requester); + logger.fine("ctxt.fileDownload().sendRequestFileAccessNotification(savedFile, requester);"); + ctxt.fileDownload().sendRequestFileAccessNotification(savedFile.getOwner(), savedFile.getId(), requester); } - return ctxt.files().save(file); + return savedFile; } } - diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReservePidCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReservePidCommand.java index 6b2872f3397..b7e3ddd8ce6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReservePidCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReservePidCommand.java @@ -1,7 +1,6 @@ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; @@ -11,6 +10,8 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import java.util.Arrays; @@ -41,11 +42,10 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { this, Collections.singleton(Permission.EditDataset), dataset); } - String nonNullDefaultIfKeyNotFound = ""; - String protocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(protocol, ctxt); + PidProvider pidProvider = ctxt.dvObjects().getEffectivePidGenerator(dataset); + try { - String returnString = idServiceBean.createIdentifier(dataset); + String returnString = pidProvider.createIdentifier(dataset); logger.fine(returnString); // No errors caught, so mark PID as reserved. dataset.setGlobalIdCreateTime(new Date()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java index caf37ad4de1..f3b33f82524 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java @@ -25,6 +25,11 @@ public class ReturnDatasetToAuthorCommand extends AbstractDatasetCommand 0) { + ctxt.storageUse().incrementStorageSizeRecursively(uningest.getOwner().getId(), (0L - archivalFileSize)); + } + } + @Override + public boolean onSuccess(CommandContext ctxt, Object r) { + + return true; + } private void resetIngestStats(DataFile uningest, CommandContext ctxt){ diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetTargetURLCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetTargetURLCommand.java index 1f5989c9e08..5a0ae7cbf5d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetTargetURLCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetTargetURLCommand.java @@ -10,10 +10,12 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; + import java.sql.Timestamp; import java.util.Collections; import java.util.Date; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; /** * @@ -36,15 +38,15 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { throw new PermissionException("Update Target URL can only be called by superusers.", this, Collections.singleton(Permission.EditDataset), target); } - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(target.getProtocol(), ctxt); + PidProvider pidProvider = PidUtil.getPidProvider(target.getGlobalId().getProviderId()); try { - String doiRetString = idServiceBean.modifyIdentifierTargetURL(target); + String doiRetString = pidProvider.modifyIdentifierTargetURL(target); if (doiRetString != null && doiRetString.contains(target.getIdentifier())) { target.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); ctxt.em().merge(target); ctxt.em().flush(); for (DataFile df : target.getFiles()) { - doiRetString = idServiceBean.modifyIdentifierTargetURL(df); + doiRetString = pidProvider.modifyIdentifierTargetURL(df); if (doiRetString != null && doiRetString.contains(df.getIdentifier())) { df.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); ctxt.em().merge(df); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java index 56c76f04c05..fe9415f39f9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java @@ -32,6 +32,8 @@ public class UpdateDataverseCommand extends AbstractCommand { private final List facetList; private final List featuredDataverseList; private final List inputLevelList; + + private boolean datasetsReindexRequired = false; public UpdateDataverseCommand(Dataverse editedDv, List facetList, List featuredDataverseList, DataverseRequest aRequest, List inputLevelList ) { @@ -74,9 +76,13 @@ public Dataverse execute(CommandContext ctxt) throws CommandException { } } - DataverseType oldDvType = ctxt.dataverses().find(editedDv.getId()).getDataverseType(); - String oldDvAlias = ctxt.dataverses().find(editedDv.getId()).getAlias(); - String oldDvName = ctxt.dataverses().find(editedDv.getId()).getName(); + Dataverse oldDv = ctxt.dataverses().find(editedDv.getId()); + + DataverseType oldDvType = oldDv.getDataverseType(); + String oldDvAlias = oldDv.getAlias(); + String oldDvName = oldDv.getName(); + oldDv = null; + Dataverse result = ctxt.dataverses().save(editedDv); if ( facetList != null ) { @@ -101,6 +107,14 @@ public Dataverse execute(CommandContext ctxt) throws CommandException { } } + // We don't want to reindex the children datasets unnecessarily: + // When these values are changed we need to reindex all children datasets + // This check is not recursive as all the values just report the immediate parent + if (!oldDvType.equals(editedDv.getDataverseType()) + || !oldDvName.equals(editedDv.getName()) + || !oldDvAlias.equals(editedDv.getAlias())) { + datasetsReindexRequired = true; + } return result; } @@ -110,9 +124,16 @@ public boolean onSuccess(CommandContext ctxt, Object r) { // first kick of async index of datasets // TODO: is this actually needed? Is there a better way to handle + // It appears that we at some point lost some extra logic here, where + // we only reindex the underlying datasets if one or more of the specific set + // of fields have been changed (since these values are included in the + // indexed solr documents for dataasets). So I'm putting that back. -L.A. Dataverse result = (Dataverse) r; - List datasets = ctxt.datasets().findByOwnerId(result.getId()); - ctxt.index().asyncIndexDatasetList(datasets, true); + + if (datasetsReindexRequired) { + List datasets = ctxt.datasets().findByOwnerId(result.getId()); + ctxt.index().asyncIndexDatasetList(datasets, true); + } return ctxt.dataverses().index((Dataverse) r); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseThemeCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseThemeCommand.java index add7b825659..9ef9fed4b1b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseThemeCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseThemeCommand.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.ThemeWidgetFragment; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; @@ -22,7 +23,6 @@ public class UpdateDataverseThemeCommand extends AbstractCommand { private final Dataverse editedDv; private final File uploadedFile; - private final Path logoPath = Paths.get("../docroot/logos"); private String locate; public UpdateDataverseThemeCommand(Dataverse editedDv, File uploadedFile, DataverseRequest aRequest, String location) { @@ -44,7 +44,7 @@ public UpdateDataverseThemeCommand(Dataverse editedDv, File uploadedFile, Datave public Dataverse execute(CommandContext ctxt) throws CommandException { // Get current dataverse, so we can delete current logo file if necessary Dataverse currentDv = ctxt.dataverses().find(editedDv.getId()); - File logoFileDir = new File(logoPath.toFile(), editedDv.getId().toString()); + File logoFileDir = ThemeWidgetFragment.getLogoDir(editedDv.getId().toString()).toFile(); File currentFile=null; if (locate.equals("FOOTER")){ diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java index 7230f9f9c0a..0c463cddec1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java @@ -2,7 +2,6 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; @@ -11,6 +10,8 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import java.sql.Timestamp; @@ -46,31 +47,31 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { //the single dataset update api checks for drafts before calling the command return; } - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(target.getProtocol(), ctxt); + PidProvider pidProvider = PidUtil.getPidProvider(target.getGlobalId().getProviderId()); + try { - Boolean doiRetString = idServiceBean.publicizeIdentifier(target); + Boolean doiRetString = pidProvider.publicizeIdentifier(target); if (doiRetString) { target.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); ctxt.em().merge(target); ctxt.em().flush(); // When updating, we want to traverse through files even if the dataset itself // didn't need updating. - String currentGlobalIdProtocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, ""); - String dataFilePIDFormat = ctxt.settings().getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); boolean isFilePIDsEnabled = ctxt.systemConfig().isFilePIDsEnabledForCollection(target.getOwner()); // We will skip trying to update the global identifiers for datafiles if they // aren't being used. // If they are, we need to assure that there's an existing PID or, as when - // creating PIDs, that the protocol matches that of the dataset DOI if - // we're going to create a DEPENDENT file PID. - String protocol = target.getProtocol(); + // creating PIDs, that it's possible. + + boolean canCreatePidsForFiles = + isFilePIDsEnabled && ctxt.dvObjects().getEffectivePidGenerator(target).canCreatePidsLike(target.getGlobalId()); + for (DataFile df : target.getFiles()) { if (isFilePIDsEnabled && // using file PIDs and (!(df.getIdentifier() == null || df.getIdentifier().isEmpty()) || // identifier exists, or - currentGlobalIdProtocol.equals(protocol) || // right protocol to create dependent DOIs, or - dataFilePIDFormat.equals("INDEPENDENT"))// or independent. TODO(pm) - check authority too + canCreatePidsForFiles) // we can create PIDs for files ) { - doiRetString = idServiceBean.publicizeIdentifier(df); + doiRetString = pidProvider.publicizeIdentifier(df); if (doiRetString) { df.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); ctxt.em().merge(df); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java new file mode 100644 index 00000000000..619740ddd89 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java @@ -0,0 +1,41 @@ + +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; + + +import java.util.logging.Logger; + +/** + * + * @author stephenkraffmiller + */ +@RequiredPermissions(Permission.AddDataset) +public class ValidateDatasetJsonCommand extends AbstractCommand { + + private static final Logger logger = Logger.getLogger(ValidateDatasetJsonCommand.class.getCanonicalName()); + + private final Dataverse dataverse; + private final String datasetJson; + + public ValidateDatasetJsonCommand(DataverseRequest aRequest, Dataverse target, String datasetJsonIn) { + super(aRequest, target); + dataverse = target; + datasetJson = datasetJsonIn; + } + + @Override + public String execute(CommandContext ctxt) throws CommandException { + + return ctxt.dataverses().isDatasetJsonValid(dataverse.getAlias(), datasetJson); + + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java index 5119b4b96c7..edd01ae98a3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java @@ -545,6 +545,16 @@ private void createDataFileDDI(XMLStreamWriter xmlw, Set excludedFieldSe List vars = variableService.findByDataTableId(dt.getId()); if (checkField("catgry", excludedFieldSet, includedFieldSet)) { if (checkIsWithoutFrequencies(vars)) { + // @todo: the method called here to calculate frequencies + // when they are missing from the database (for whatever + // reasons) subsets the physical tab-delimited file and + // calculates them in real time. this is very expensive operation + // potentially. let's make sure that, when we do this, we + // save the resulting frequencies in the database, so that + // we don't have to do this again. Also, let's double check + // whether the "checkIsWithoutFrequencies()" method is doing + // the right thing - as it appears to return true when there + // are no categorical variables in the DataTable (?) calculateFrequencies(df, vars); } } @@ -580,6 +590,7 @@ private boolean checkIsWithoutFrequencies(List vars) { private void calculateFrequencies(DataFile df, List vars) { + // @todo: see the comment in the part of the code that calls this method try { DataConverter dc = new DataConverter(); File tabFile = dc.downloadFromStorageIO(df.getStorageIO()); diff --git a/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java b/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java index a7967f6ccb6..f0d77eb8b52 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java @@ -8,12 +8,11 @@ import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; - -import edu.harvard.iq.dataverse.DOIDataCiteRegisterService; import edu.harvard.iq.dataverse.DataCitation; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DOIDataCiteRegisterService; import io.gdcc.spi.export.ExportDataProvider; import edu.harvard.iq.dataverse.util.bagit.OREMap; import edu.harvard.iq.dataverse.util.json.JsonPrinter; diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 24449e8010a..9a689f7a4ed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -188,7 +188,10 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) writeFullElement(xmlw, "titl", dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); writeFullElement(xmlw, "subTitl", dto2Primitive(version, DatasetFieldConstant.subTitle)); - writeFullElement(xmlw, "altTitl", dto2Primitive(version, DatasetFieldConstant.alternativeTitle)); + FieldDTO altField = dto2FieldDTO( version, DatasetFieldConstant.alternativeTitle, "citation" ); + if (altField != null) { + writeMultipleElement(xmlw, "altTitl", altField, datasetDto.getMetadataLanguage()); + } xmlw.writeStartElement("IDNo"); writeAttribute(xmlw, "agency", persistentAgency); @@ -988,7 +991,11 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT // productionPlace was made multiple as of 5.14: // (a quick backward compatibility check was added to dto2PrimitiveList(), // see the method for details) - writeFullElementList(xmlw, "prodPlac", dto2PrimitiveList(version, DatasetFieldConstant.productionPlace)); + + FieldDTO prodPlac = dto2FieldDTO( version, DatasetFieldConstant.productionPlace, "citation" ); + if (prodPlac != null) { + writeMultipleElement(xmlw, "prodPlac", prodPlac, null); + } writeSoftwareElement(xmlw, version); writeGrantElement(xmlw, version); @@ -1811,11 +1818,13 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // labl if ((vm == null || !vm.containsKey("label"))) { - xmlw.writeStartElement("labl"); - writeAttribute(xmlw, "level", "variable"); - xmlw.writeCharacters(dvar.getString("label")); - xmlw.writeEndElement(); //labl - } else if (vm != null && vm.containsKey("label")) { + if(dvar.containsKey("label")) { + xmlw.writeStartElement("labl"); + writeAttribute(xmlw, "level", "variable"); + xmlw.writeCharacters(dvar.getString("label")); + xmlw.writeEndElement(); //labl + } + } else { xmlw.writeStartElement("labl"); writeAttribute(xmlw, "level", "variable"); xmlw.writeCharacters(vm.getString("label")); diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index f7e75ca03fa..49ceabc5900 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -1,11 +1,7 @@ package edu.harvard.iq.dataverse.export.openaire; import java.io.OutputStream; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.logging.Logger; import jakarta.json.JsonObject; @@ -17,16 +13,16 @@ import com.google.gson.Gson; -import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.HandlenetServiceBean; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO; import edu.harvard.iq.dataverse.api.dto.FieldDTO; import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -193,10 +189,10 @@ public static void writeIdentifierElement(XMLStreamWriter xmlw, String identifie if (StringUtils.isNotBlank(identifier)) { Map identifier_map = new HashMap(); - if (StringUtils.containsIgnoreCase(identifier, DOIServiceBean.DOI_RESOLVER_URL)) { + if (StringUtils.containsIgnoreCase(identifier, AbstractDOIProvider.DOI_RESOLVER_URL)) { identifier_map.put("identifierType", "DOI"); identifier = StringUtils.substring(identifier, identifier.indexOf("10.")); - } else if (StringUtils.containsIgnoreCase(identifier, HandlenetServiceBean.HDL_RESOLVER_URL)) { + } else if (StringUtils.containsIgnoreCase(identifier, HandlePidProvider.HDL_RESOLVER_URL)) { identifier_map.put("identifierType", "Handle"); if (StringUtils.contains(identifier, "http")) { identifier = identifier.replace(identifier.substring(0, identifier.indexOf("/") + 2), ""); @@ -325,12 +321,34 @@ public static void writeTitlesElement(XMLStreamWriter xmlw, DatasetVersionDTO da String subtitle = dto2Primitive(datasetVersionDTO, DatasetFieldConstant.subTitle); title_check = writeTitleElement(xmlw, "Subtitle", subtitle, title_check, language); - String alternativeTitle = dto2Primitive(datasetVersionDTO, DatasetFieldConstant.alternativeTitle); - title_check = writeTitleElement(xmlw, "AlternativeTitle", alternativeTitle, title_check, language); - + title_check = writeMultipleTitleElement(xmlw, "AlternativeTitle", datasetVersionDTO, "citation", title_check, language); writeEndTag(xmlw, title_check); } + private static boolean writeMultipleTitleElement(XMLStreamWriter xmlw, String titleType, DatasetVersionDTO datasetVersionDTO, String metadataBlockName, boolean title_check, String language) throws XMLStreamException { + MetadataBlockDTO block = datasetVersionDTO.getMetadataBlocks().get(metadataBlockName); + if (block != null) { + logger.fine("Block is not empty"); + List fieldsBlock = block.getFields(); + if (fieldsBlock != null) { + for (FieldDTO fieldDTO : fieldsBlock) { + logger.fine(titleType + " " + fieldDTO.getTypeName()); + if (titleType.toLowerCase().equals(fieldDTO.getTypeName().toLowerCase())) { + logger.fine("Found Alt title"); + List fields = fieldDTO.getMultiplePrimitive(); + for (String value : fields) { + if (!writeTitleElement(xmlw, titleType, value, title_check, language)) + title_check = false; + } + break; + } + } + } + } + + return title_check; + } + /** * 3, Title (with optional type sub-properties) (M) * diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index a52679deebc..e7ae451cacf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -22,18 +22,16 @@ import java.util.logging.Level; import java.util.logging.Logger; -import jakarta.json.Json; -import jakarta.json.JsonArray; -import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonNumber; import jakarta.json.JsonObject; -import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonString; import jakarta.json.JsonValue; import jakarta.ws.rs.HttpMethod; import org.apache.commons.codec.binary.StringUtils; +import static edu.harvard.iq.dataverse.api.ApiConstants.DS_VERSION_LATEST; + /** * Handles an operation on a specific file. Requires a file id in order to be * instantiated. Applies logic based on an {@link ExternalTool} specification, @@ -41,15 +39,10 @@ */ public class ExternalToolHandler extends URLTokenUtil { - private final ExternalTool externalTool; + public final ExternalTool externalTool; private String requestMethod; - - public static final String HTTP_METHOD="httpMethod"; - public static final String TIMEOUT="timeOut"; - public static final String SIGNED_URL="signedUrl"; - public static final String NAME="name"; - public static final String URL_TEMPLATE="urlTemplate"; + /** @@ -110,7 +103,7 @@ public String handleRequest(boolean preview) { switch (externalTool.getScope()) { case DATASET: callback=SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/" - + dataset.getId() + "/versions/:latest/toolparams/" + externalTool.getId(); + + dataset.getId() + "/versions/" + DS_VERSION_LATEST + "/toolparams/" + externalTool.getId(); break; case FILE: callback= SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/files/" @@ -134,12 +127,12 @@ public String handleRequest(boolean preview) { } else { // ToDo - if the allowedApiCalls() are defined, could/should we send them to - // tools using GET as well? + // tools using POST as well? if (requestMethod.equals(HttpMethod.POST)) { - String body = JsonUtil.prettyPrint(createPostBody(params).build()); + String body = JsonUtil.prettyPrint(createPostBody(params, null).build()); try { - logger.info("POST Body: " + body); + logger.fine("POST Body: " + body); return postFormData(body); } catch (IOException | InterruptedException ex) { Logger.getLogger(ExternalToolHandler.class.getName()).log(Level.SEVERE, null, ex); @@ -149,60 +142,6 @@ public String handleRequest(boolean preview) { return null; } - public JsonObject getParams(JsonObject toolParameters) { - //ToDo - why an array of object each with a single key/value pair instead of one object? - JsonArray queryParams = toolParameters.getJsonArray("queryParameters"); - - // ToDo return json and print later - JsonObjectBuilder paramsBuilder = Json.createObjectBuilder(); - if (!(queryParams == null) && !queryParams.isEmpty()) { - queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> { - queryParam.keySet().forEach((key) -> { - String value = queryParam.getString(key); - JsonValue param = getParam(value); - if (param != null) { - paramsBuilder.add(key, param); - } - }); - }); - } - return paramsBuilder.build(); - } - - public JsonObjectBuilder createPostBody(JsonObject params) { - JsonObjectBuilder bodyBuilder = Json.createObjectBuilder(); - bodyBuilder.add("queryParameters", params); - String apiCallStr = externalTool.getAllowedApiCalls(); - if (apiCallStr != null && !apiCallStr.isBlank()) { - JsonArray apiArray = JsonUtil.getJsonArray(externalTool.getAllowedApiCalls()); - JsonArrayBuilder apisBuilder = Json.createArrayBuilder(); - apiArray.getValuesAs(JsonObject.class).forEach(((apiObj) -> { - logger.fine(JsonUtil.prettyPrint(apiObj)); - String name = apiObj.getJsonString(NAME).getString(); - String httpmethod = apiObj.getJsonString(HTTP_METHOD).getString(); - int timeout = apiObj.getInt(TIMEOUT); - String urlTemplate = apiObj.getJsonString(URL_TEMPLATE).getString(); - logger.fine("URL Template: " + urlTemplate); - urlTemplate = SystemConfig.getDataverseSiteUrlStatic() + urlTemplate; - String apiPath = replaceTokensWithValues(urlTemplate); - logger.fine("URL WithTokens: " + apiPath); - String url = apiPath; - // Sign if apiToken exists, otherwise send unsigned URL (i.e. for guest users) - ApiToken apiToken = getApiToken(); - if (apiToken != null) { - url = UrlSignerUtil.signUrl(apiPath, timeout, apiToken.getAuthenticatedUser().getUserIdentifier(), - httpmethod, JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") - + getApiToken().getTokenString()); - } - logger.fine("Signed URL: " + url); - apisBuilder.add(Json.createObjectBuilder().add(NAME, name).add(HTTP_METHOD, httpmethod) - .add(SIGNED_URL, url).add(TIMEOUT, timeout)); - })); - bodyBuilder.add("signedUrls", apisBuilder); - } - return bodyBuilder; - } - private String postFormData(String allowedApis) throws IOException, InterruptedException { String url = null; HttpClient client = HttpClient.newHttpClient(); @@ -253,4 +192,11 @@ public String getExploreScript() { logger.fine("Exploring with " + toolUrl); return getScriptForUrl(toolUrl); } + + // TODO: Consider merging with getExploreScript + public String getConfigureScript() { + String toolUrl = this.getToolUrlWithQueryParams(); + logger.fine("Configuring with " + toolUrl); + return getScriptForUrl(toolUrl); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java index 877fc68e4a1..c93e2c6aa94 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java @@ -46,7 +46,7 @@ String getRefreshToken() { return refreshToken; } - ArrayList getOtherTokens() { + public ArrayList getOtherTokens() { return otherTokens; } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java new file mode 100644 index 00000000000..7e555935e2e --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java @@ -0,0 +1,38 @@ +package edu.harvard.iq.dataverse.globus; + +public class GlobusEndpoint { + + private String id; + private String clientToken; + private String basePath; + + public GlobusEndpoint(String id, String clientToken, String basePath) { + this.id = id; + this.clientToken = clientToken; + this.basePath = basePath; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getClientToken() { + return clientToken; + } + + public void setClientToken(String clientToken) { + this.clientToken = clientToken; + } + + public String getBasePath() { + return basePath; + } + + public void setBasePath(String basePath) { + this.basePath = basePath; + } +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index d2613422be9..3e60441850b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -1,9 +1,11 @@ package edu.harvard.iq.dataverse.globus; +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.github.benmanes.caffeine.cache.Scheduler; import com.google.gson.FieldNamingPolicy; import com.google.gson.GsonBuilder; import edu.harvard.iq.dataverse.*; - import jakarta.ejb.Asynchronous; import jakarta.ejb.EJB; import jakarta.ejb.Stateless; @@ -15,8 +17,13 @@ import jakarta.json.JsonArray; import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonPatch; +import jakarta.json.JsonString; +import jakarta.json.JsonValue.ValueType; +import jakarta.json.stream.JsonParsingException; import jakarta.servlet.http.HttpServletRequest; +import jakarta.ws.rs.HttpMethod; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; @@ -29,6 +36,8 @@ import java.net.URLEncoder; import java.sql.Timestamp; import java.text.SimpleDateFormat; +import java.time.Duration; +import java.time.temporal.ChronoUnit; import java.util.*; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; @@ -40,17 +49,26 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import org.apache.commons.codec.binary.StringUtils; +import org.primefaces.PrimeFaces; + import com.google.gson.Gson; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.privateurl.PrivateUrl; +import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; @Stateless @@ -59,197 +77,313 @@ public class GlobusServiceBean implements java.io.Serializable { @EJB protected DatasetServiceBean datasetSvc; - @EJB protected SettingsServiceBean settingsSvc; - @Inject DataverseSession session; - @EJB protected AuthenticationServiceBean authSvc; - @EJB EjbDataverseEngine commandEngine; - @EJB UserNotificationServiceBean userNotificationService; + @EJB + PrivateUrlServiceBean privateUrlService; + @EJB + FileDownloadServiceBean fileDownloadService; + @EJB + DataFileServiceBean dataFileService; private static final Logger logger = Logger.getLogger(GlobusServiceBean.class.getCanonicalName()); private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); - private String code; - private String userTransferToken; - private String state; - - public String getState() { - return state; - } - - public void setState(String state) { - this.state = state; - } - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getUserTransferToken() { - return userTransferToken; - } + private String getRuleId(GlobusEndpoint endpoint, String principal, String permissions) + throws MalformedURLException { - public void setUserTransferToken(String userTransferToken) { - this.userTransferToken = userTransferToken; - } + String principalType = "identity"; - ArrayList checkPermisions(AccessToken clientTokenUser, String directory, String globusEndpoint, - String principalType, String principal) throws MalformedURLException { - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access_list"); - MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); - ArrayList ids = new ArrayList(); + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access_list"); + MakeRequestResponse result = makeRequest(url, "Bearer", endpoint.getClientToken(), "GET", null); if (result.status == 200) { AccessList al = parseJson(result.jsonResponse, AccessList.class, false); for (int i = 0; i < al.getDATA().size(); i++) { Permissions pr = al.getDATA().get(i); - if ((pr.getPath().equals(directory + "/") || pr.getPath().equals(directory)) + + if ((pr.getPath().equals(endpoint.getBasePath() + "/") || pr.getPath().equals(endpoint.getBasePath())) && pr.getPrincipalType().equals(principalType) - && ((principal == null) || (principal != null && pr.getPrincipal().equals(principal)))) { - ids.add(pr.getId()); + && ((principal == null) || (principal != null && pr.getPrincipal().equals(principal))) + && pr.getPermissions().equals(permissions)) { + return pr.getId(); } else { - logger.info(pr.getPath() + " === " + directory + " == " + pr.getPrincipalType()); + logger.fine(pr.getPath() + " === " + endpoint.getBasePath() + " == " + pr.getPrincipalType()); continue; } } } - - return ids; + return null; } - public void updatePermision(AccessToken clientTokenUser, String directory, String principalType, String perm) - throws MalformedURLException { - if (directory != null && !directory.equals("")) { - directory = directory + "/"; - } - logger.info("Start updating permissions." + " Directory is " + directory); - String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - ArrayList rules = checkPermisions(clientTokenUser, directory, globusEndpoint, principalType, null); - logger.info("Size of rules " + rules.size()); - int count = 0; - while (count < rules.size()) { - logger.info("Start removing rules " + rules.get(count)); - Permissions permissions = new Permissions(); - permissions.setDATA_TYPE("access"); - permissions.setPermissions(perm); - permissions.setPath(directory); - - Gson gson = new GsonBuilder().create(); - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" - + rules.get(count)); - logger.info("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" - + rules.get(count)); - MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT", gson.toJson(permissions)); - if (result.status != 200) { - logger.warning("Cannot update access rule " + rules.get(count)); - } else { - logger.info("Access rule " + rules.get(count) + " was updated"); - } - count++; - } - } - - public void deletePermision(String ruleId, Logger globusLogger) throws MalformedURLException { - + /** + * Call to delete a globus rule related to the specified dataset. + * + * @param ruleId - Globus rule id - assumed to be associated with the + * dataset's file path (should not be called with a user + * specified rule id w/o further checking) + * @param datasetId - the id of the dataset associated with the rule + * @param globusLogger - a separate logger instance, may be null + */ + public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger) { + globusLogger.fine("Start deleting rule " + ruleId + " for dataset " + dataset.getId()); if (ruleId.length() > 0) { - AccessToken clientTokenUser = getClientToken(); - globusLogger.info("Start deleting permissions."); - String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - - URL url = new URL( - "https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + ruleId); - MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "DELETE", null); - if (result.status != 200) { - globusLogger.warning("Cannot delete access rule " + ruleId); - } else { - globusLogger.info("Access rule " + ruleId + " was deleted successfully"); + if (dataset != null) { + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); + if (endpoint != null) { + String accessToken = endpoint.getClientToken(); + globusLogger.info("Start deleting permissions."); + try { + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + + "/access/" + ruleId); + MakeRequestResponse result = makeRequest(url, "Bearer", accessToken, "DELETE", null); + if (result.status != 200) { + globusLogger.warning("Cannot delete access rule " + ruleId); + } else { + globusLogger.info("Access rule " + ruleId + " was deleted successfully"); + } + } catch (MalformedURLException ex) { + logger.log(Level.WARNING, + "Failed to delete access rule " + ruleId + " on endpoint " + endpoint.getId(), ex); + } + } } } - } - public int givePermission(String principalType, String principal, String perm, AccessToken clientTokenUser, - String directory, String globusEndpoint) throws MalformedURLException { + /** + * Request read/write access for the specified principal and generate a list of + * accessible paths for new files for the specified dataset. + * + * @param principal - the id of the Globus principal doing the transfer + * @param dataset + * @param numberOfPaths - how many files are to be transferred + * @return + */ + public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int numberOfPaths) { - ArrayList rules = checkPermisions(clientTokenUser, directory, globusEndpoint, principalType, principal); + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); + String principalType = "identity"; Permissions permissions = new Permissions(); permissions.setDATA_TYPE("access"); permissions.setPrincipalType(principalType); permissions.setPrincipal(principal); - permissions.setPath(directory + "/"); - permissions.setPermissions(perm); + permissions.setPath(endpoint.getBasePath() + "/"); + permissions.setPermissions("rw"); + + JsonObjectBuilder response = Json.createObjectBuilder(); + //Try to create the directory (202 status) if it does not exist (502-already exists) + int mkDirStatus = makeDirs(endpoint, dataset); + if (!(mkDirStatus== 202 || mkDirStatus == 502)) { + return response.add("status", mkDirStatus).build(); + } + //The dir for the dataset's data exists, so try to request permission for the principal + int requestPermStatus = requestPermission(endpoint, dataset, permissions); + response.add("status", requestPermStatus); + if (requestPermStatus == 201) { + String driverId = dataset.getEffectiveStorageDriverId(); + JsonObjectBuilder paths = Json.createObjectBuilder(); + for (int i = 0; i < numberOfPaths; i++) { + String storageIdentifier = DataAccess.getNewStorageIdentifier(driverId); + int lastIndex = Math.max(storageIdentifier.lastIndexOf("/"), storageIdentifier.lastIndexOf(":")); + paths.add(storageIdentifier, endpoint.getBasePath() + "/" + storageIdentifier.substring(lastIndex + 1)); + } + response.add("paths", paths.build()); + } + return response.build(); + } + + /** + * Call to create the directories for the specified dataset. + * + * @param dataset + * @return - an error status at whichever subdir the process fails at or the + * final success status + */ + private int makeDirs(GlobusEndpoint endpoint, Dataset dataset) { + logger.fine("Creating dirs: " + endpoint.getBasePath()); + int index = endpoint.getBasePath().lastIndexOf(dataset.getAuthorityForFileStorage()) + + dataset.getAuthorityForFileStorage().length(); + String nextDir = endpoint.getBasePath().substring(0, index); + int response = makeDir(endpoint, nextDir); + String identifier = dataset.getIdentifierForFileStorage(); + //Usually identifiers will have 0 or 1 slashes (e.g. FK2/ABCDEF) but the while loop will handle any that could have more + //Will skip if the first makeDir above failed + while ((identifier.length() > 0) && ((response == 202 || response == 502))) { + index = identifier.indexOf('/'); + if (index == -1) { + //Last dir to create + response = makeDir(endpoint, nextDir + "/" + identifier); + identifier = ""; + } else { + //The next dir to create + nextDir = nextDir + "/" + identifier.substring(0, index); + response = makeDir(endpoint, nextDir); + //The rest of the identifier + identifier = identifier.substring(index + 1); + } + } + return response; + } + + private int makeDir(GlobusEndpoint endpoint, String dir) { + MakeRequestResponse result = null; + String body = "{\"DATA_TYPE\":\"mkdir\",\"path\":\"" + dir + "\"}"; + try { + logger.fine(body); + URL url = new URL( + "https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint.getId() + "/mkdir"); + result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST", body); + + switch (result.status) { + case 202: + logger.fine("Dir " + dir + " was created successfully."); + break; + case 502: + logger.fine("Dir " + dir + " already exists."); + break; + default: + logger.warning("Status " + result.status + " received when creating dir " + dir); + logger.fine("Response: " + result.jsonResponse); + } + } catch (MalformedURLException ex) { + // Misconfiguration + logger.warning("Failed to create dir on " + endpoint.getId()); + return 500; + } + return result.status; + } + + private int requestPermission(GlobusEndpoint endpoint, Dataset dataset, Permissions permissions) { Gson gson = new GsonBuilder().create(); MakeRequestResponse result = null; - if (rules.size() == 0) { - logger.info("Start creating the rule"); - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access"); - result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(), "POST", - gson.toJson(permissions)); + logger.fine("Start creating the rule"); - if (result.status == 400) { + try { + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access"); + result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST", gson.toJson(permissions)); + + switch (result.status) { + case 404: + logger.severe("Endpoint " + endpoint.getId() + " was not found"); + break; + case 400: logger.severe("Path " + permissions.getPath() + " is not valid"); - } else if (result.status == 409) { + break; + case 409: logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules"); + break; + case 201: + JsonObject globusResponse = JsonUtil.getJsonObject(result.jsonResponse); + if (globusResponse != null && globusResponse.containsKey("access_id")) { + permissions.setId(globusResponse.getString("access_id")); + monitorTemporaryPermissions(permissions.getId(), dataset.getId()); + logger.fine("Access rule " + permissions.getId() + " was created successfully"); + } else { + // Shouldn't happen! + logger.warning("Access rule id not returned for dataset " + dataset.getId()); + } } - return result.status; - } else { - logger.info("Start Updating the rule"); - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" - + rules.get(0)); - result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT", - gson.toJson(permissions)); - - if (result.status == 400) { - logger.severe("Path " + permissions.getPath() + " is not valid"); - } else if (result.status == 409) { - logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules"); - } - logger.info("Result status " + result.status); + } catch (MalformedURLException ex) { + // Misconfiguration + logger.warning("Failed to create access rule URL for " + endpoint.getId()); + return 500; } - - return result.status; } - public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId) throws MalformedURLException { - - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId - + "/successful_transfers"); - - MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); + /** + * Given an array of remote files to be referenced in the dataset, create a set + * of valid storage identifiers and return a map of the remote file paths to + * storage identifiers. + * + * @param dataset + * @param referencedFiles - a JSON array of remote files to be referenced in the + * dataset - each should be a string with the /path/to/file + * @return - a map of supplied paths to valid storage identifiers + */ + public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray referencedFiles) { + String driverId = dataset.getEffectiveStorageDriverId(); + JsonArray endpoints = GlobusAccessibleStore.getReferenceEndpointsWithPaths(driverId); + + JsonObjectBuilder fileMap = Json.createObjectBuilder(); + referencedFiles.forEach(value -> { + if (value.getValueType() != ValueType.STRING) { + throw new JsonParsingException("ReferencedFiles must be strings", null); + } + String referencedFile = ((JsonString) value).getString(); + boolean valid = false; + for (int i = 0; i < endpoints.size(); i++) { + if (referencedFile.startsWith(((JsonString) endpoints.get(i)).getString())) { + valid = true; + } + } + if (!valid) { + throw new IllegalArgumentException( + "Referenced file " + referencedFile + " is not in an allowed endpoint/path"); + } + String storageIdentifier = DataAccess.getNewStorageIdentifier(driverId); + fileMap.add(referencedFile, storageIdentifier + "//" + referencedFile); + }); + return fileMap.build(); + } - if (result.status == 200) { - logger.info(" SUCCESS ====== "); - return true; - } - return false; + /** + * A cache of temporary permission requests - for upload (rw) and download (r) + * access. When a temporary permission request is created, it is added to the + * cache. After GLOBUS_CACHE_MAXAGE minutes, if a transfer has not been started, + * the permission will be revoked/deleted. (If a transfer has been started, the + * permission will not be revoked/deleted until the transfer is complete. This + * is handled in other methods.) + */ + // ToDo - nominally this doesn't need to be as long as the allowed time for the + // downloadCache so there could be two separate settings. + // Single cache of open rules/permission requests + private final Cache rulesCache = Caffeine.newBuilder() + .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES)) + .scheduler(Scheduler.systemScheduler()).evictionListener((ruleId, datasetId, cause) -> { + // Delete rules that expire + logger.fine("Rule " + ruleId + " expired"); + Dataset dataset = datasetSvc.find(datasetId); + deletePermission((String) ruleId, dataset, logger); + }) + + .build(); + + // Convenience method to add a temporary permission request to the cache - + // allows logging of temporary permission requests + private void monitorTemporaryPermissions(String ruleId, long datasetId) { + logger.fine("Adding rule " + ruleId + " for dataset " + datasetId); + rulesCache.put(ruleId, datasetId); } - public GlobusTask getTask(AccessToken clientTokenUser, String taskId, Logger globusLogger) throws MalformedURLException { + /** + * Call the Globus API to get info about the transfer. + * + * @param accessToken + * @param taskId - the Globus task id supplied by the user + * @param globusLogger - the transaction-specific logger to use (separate log + * files are created in general, some calls may use the + * class logger) + * @return + * @throws MalformedURLException + */ + public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger) throws MalformedURLException { URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId); - MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); + MakeRequestResponse result = makeRequest(url, "Bearer", accessToken, "GET", null); GlobusTask task = null; @@ -264,49 +398,34 @@ public GlobusTask getTask(AccessToken clientTokenUser, String taskId, Logger glo return task; } - public AccessToken getClientToken() throws MalformedURLException { - String globusBasicToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""); - URL url = new URL( - "https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials"); - - MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null); + /** + * Globus call to get an access token for the user using the long-term token we + * hold. + * + * @param globusBasicToken - the base64 encoded Globus Basic token comprised of + * the : + * @return - a valid Globus access token + */ + public static AccessToken getClientToken(String globusBasicToken) { + URL url; AccessToken clientTokenUser = null; - if (result.status == 200) { - clientTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); - } - return clientTokenUser; - } - - public AccessToken getAccessToken(HttpServletRequest origRequest, String globusBasicToken) - throws UnsupportedEncodingException, MalformedURLException { - String serverName = origRequest.getServerName(); - if (serverName.equals("localhost")) { - logger.severe("Changing localhost to utoronto"); - serverName = "utl-192-123.library.utoronto.ca"; - } - - String redirectURL = "https://" + serverName + "/globus.xhtml"; - - redirectURL = URLEncoder.encode(redirectURL, "UTF-8"); - - URL url = new URL("https://auth.globus.org/v2/oauth2/token?code=" + code + "&redirect_uri=" + redirectURL - + "&grant_type=authorization_code"); - logger.info(url.toString()); - MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null); - AccessToken accessTokenUser = null; + try { + url = new URL( + "https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials"); - if (result.status == 200) { - logger.info("Access Token: \n" + result.toString()); - accessTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); - logger.info(accessTokenUser.getAccessToken()); + MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null); + if (result.status == 200) { + clientTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); + } + } catch (MalformedURLException e) { + // On a statically defined URL... + e.printStackTrace(); } - - return accessTokenUser; - + return clientTokenUser; } - public MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method, + private static MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method, String jsonString) { String str = null; HttpURLConnection connection = null; @@ -314,8 +433,7 @@ public MakeRequestResponse makeRequest(URL url, String authType, String authCode try { connection = (HttpURLConnection) url.openConnection(); // Basic - // NThjMGYxNDQtN2QzMy00ZTYzLTk3MmUtMjljNjY5YzJjNGJiOktzSUVDMDZtTUxlRHNKTDBsTmRibXBIbjZvaWpQNGkwWVVuRmQyVDZRSnc9 - logger.info(authType + " " + authCode); + logger.fine("For URL: " + url.toString()); connection.setRequestProperty("Authorization", authType + " " + authCode); // connection.setRequestProperty("Content-Type", // "application/x-www-form-urlencoded"); @@ -323,32 +441,30 @@ public MakeRequestResponse makeRequest(URL url, String authType, String authCode if (jsonString != null) { connection.setRequestProperty("Content-Type", "application/json"); connection.setRequestProperty("Accept", "application/json"); - logger.info(jsonString); + logger.fine(jsonString); connection.setDoOutput(true); + OutputStreamWriter wr = new OutputStreamWriter(connection.getOutputStream()); wr.write(jsonString); wr.flush(); } status = connection.getResponseCode(); - logger.info("Status now " + status); + logger.fine("Status now " + status); InputStream result = connection.getInputStream(); if (result != null) { - logger.info("Result is not null"); str = readResultJson(result).toString(); - logger.info("str is "); - logger.info(result.toString()); + logger.fine("str is " + result.toString()); } else { - logger.info("Result is null"); + logger.fine("Result is null"); str = null; } - logger.info("status: " + status); + logger.fine("status: " + status); } catch (IOException ex) { - logger.info("IO"); logger.severe(ex.getMessage()); - logger.info(ex.getCause().toString()); - logger.info(ex.getStackTrace().toString()); + logger.fine(ex.getCause().toString()); + logger.fine(ex.getStackTrace().toString()); } finally { if (connection != null) { connection.disconnect(); @@ -359,18 +475,16 @@ public MakeRequestResponse makeRequest(URL url, String authType, String authCode } - private StringBuilder readResultJson(InputStream in) { + private static StringBuilder readResultJson(InputStream in) { StringBuilder sb = null; - try { - - BufferedReader br = new BufferedReader(new InputStreamReader(in)); + try (BufferedReader br = new BufferedReader(new InputStreamReader(in))) { sb = new StringBuilder(); String line; while ((line = br.readLine()) != null) { sb.append(line + "\n"); } br.close(); - logger.info(sb.toString()); + logger.fine(sb.toString()); } catch (IOException e) { sb = null; logger.severe(e.getMessage()); @@ -378,7 +492,7 @@ private StringBuilder readResultJson(InputStream in) { return sb; } - private T parseJson(String sb, Class jsonParserClass, boolean namingPolicy) { + private static T parseJson(String sb, Class jsonParserClass, boolean namingPolicy) { if (sb != null) { Gson gson = null; if (namingPolicy) { @@ -395,32 +509,7 @@ private T parseJson(String sb, Class jsonParserClass, boolean namingPolic } } - public String getDirectory(String datasetId) { - Dataset dataset = null; - String directory = null; - try { - dataset = datasetSvc.find(Long.parseLong(datasetId)); - if (dataset == null) { - logger.severe("Dataset not found " + datasetId); - return null; - } - String storeId = dataset.getStorageIdentifier(); - storeId.substring(storeId.indexOf("//") + 1); - directory = storeId.substring(storeId.indexOf("//") + 1); - logger.info(storeId); - logger.info(directory); - logger.info("Storage identifier:" + dataset.getIdentifierForFileStorage()); - return directory; - - } catch (NumberFormatException nfe) { - logger.severe(nfe.getMessage()); - - return null; - } - - } - - class MakeRequestResponse { + static class MakeRequestResponse { public String jsonResponse; public int status; @@ -431,81 +520,61 @@ class MakeRequestResponse { } - private MakeRequestResponse findDirectory(String directory, AccessToken clientTokenUser, String globusEndpoint) - throws MalformedURLException { - URL url = new URL(" https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/ls?path=" - + directory + "/"); - - MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); - logger.info("find directory status:" + result.status); - - return result; + /** + * Cache of open download Requests This cache keeps track of the set of files + * selected for transfer out (download) via Globus. It is a means of + * transferring the list from the DatasetPage, where it is generated via user UI + * actions, and the Datasets/globusDownloadParameters API. + * + * Nominally, the dataverse-globus app will call that API endpoint and then + * /requestGlobusDownload, at which point the cached info is sent to the app. If + * the app doesn't call within 5 minutes (the time allowed to call + * /globusDownloadParameters) + GLOBUS_CACHE_MAXAGE minutes (a ~longer period + * giving the user time to make choices in the app), the cached info is deleted. + * + */ + private final Cache downloadCache = Caffeine.newBuilder() + .expireAfterWrite( + Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class) + 5, ChronoUnit.MINUTES)) + .scheduler(Scheduler.systemScheduler()).evictionListener((downloadId, datasetId, cause) -> { + // Delete downloads that expire + logger.fine("Download for " + downloadId + " expired"); + }) + + .build(); + + public JsonObject getFilesForDownload(String downloadId) { + return downloadCache.getIfPresent(downloadId); } - public boolean giveGlobusPublicPermissions(String datasetId) - throws UnsupportedEncodingException, MalformedURLException { + public int setPermissionForDownload(Dataset dataset, String principal) { + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); + String principalType = "identity"; - String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - String globusBasicToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""); - if (globusEndpoint.equals("") || globusBasicToken.equals("")) { - return false; - } - AccessToken clientTokenUser = getClientToken(); - if (clientTokenUser == null) { - logger.severe("Cannot get client token "); - return false; - } - - String directory = getDirectory(datasetId); - logger.info(directory); - - MakeRequestResponse status = findDirectory(directory, clientTokenUser, globusEndpoint); - - if (status.status == 200) { - - /* - * FilesList fl = parseJson(status.jsonResponse, FilesList.class, false); - * ArrayList files = fl.getDATA(); if (files != null) { for (FileG file: - * files) { if (!file.getName().contains("cached") && - * !file.getName().contains(".thumb")) { int perStatus = - * givePermission("all_authenticated_users", "", "r", clientTokenUser, directory - * + "/" + file.getName(), globusEndpoint); logger.info("givePermission status " - * + perStatus + " for " + file.getName()); if (perStatus == 409) { - * logger.info("Permissions already exist or limit was reached for " + - * file.getName()); } else if (perStatus == 400) { - * logger.info("No file in Globus " + file.getName()); } else if (perStatus != - * 201) { logger.info("Cannot get permission for " + file.getName()); } } } } - */ - - int perStatus = givePermission("all_authenticated_users", "", "r", clientTokenUser, directory, - globusEndpoint); - logger.info("givePermission status " + perStatus); - if (perStatus == 409) { - logger.info("Permissions already exist or limit was reached"); - } else if (perStatus == 400) { - logger.info("No directory in Globus"); - } else if (perStatus != 201 && perStatus != 200) { - logger.info("Cannot give read permission"); - return false; - } - - } else if (status.status == 404) { - logger.info("There is no globus directory"); - } else { - logger.severe("Cannot find directory in globus, status " + status); - return false; - } + Permissions permissions = new Permissions(); + permissions.setDATA_TYPE("access"); + permissions.setPrincipalType(principalType); + permissions.setPrincipal(principal); + permissions.setPath(endpoint.getBasePath() + "/"); + permissions.setPermissions("r"); - return true; + return requestPermission(endpoint, dataset, permissions); } - // Generates the URL to launch the Globus app + // Generates the URL to launch the Globus app for upload public String getGlobusAppUrlForDataset(Dataset d) { return getGlobusAppUrlForDataset(d, true, null); } - public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df) { + /** + * Generated the App URl for upload (in) or download (out) + * + * @param d - the dataset involved + * @param upload - boolean, true for upload, false for download + * @param dataFiles - a list of the DataFiles to be downloaded + * @return + */ + public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List dataFiles) { String localeCode = session.getLocaleCode(); ApiToken apiToken = null; User user = session.getUser(); @@ -518,46 +587,53 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df) apiToken = authSvc.generateApiTokenForUser((AuthenticatedUser) user); } } - String storePrefix = ""; String driverId = d.getEffectiveStorageDriverId(); try { - storePrefix = DataAccess.getDriverPrefix(driverId); } catch (Exception e) { logger.warning("GlobusAppUrlForDataset: Failed to get storePrefix for " + driverId); } - //Use URLTokenUtil for params currently in common with external tools. - URLTokenUtil tokenUtil = new URLTokenUtil(d, df, apiToken, localeCode); - String appUrl; + + // Use URLTokenUtil for params currently in common with external tools. + URLTokenUtil tokenUtil = new URLTokenUtil(d, null, apiToken, localeCode); + String appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost"); + String callback = null; if (upload) { - appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") - + "/upload?datasetPid={datasetPid}&siteUrl={siteUrl}&apiToken={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; + appUrl = appUrl + "/upload?dvLocale={localeCode}"; + callback = SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/" + d.getId() + + "/globusUploadParameters?locale=" + localeCode; } else { - if (df == null) { - appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") - + "/download?datasetPid={datasetPid}&siteUrl={siteUrl}" - + ((apiToken != null) ? "&apiToken={apiToken}" : "") - + "&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; - } else { - String rawStorageId = df.getStorageIdentifier(); - rawStorageId=rawStorageId.substring(rawStorageId.lastIndexOf(":")+1); - appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") - + "/download-file?datasetPid={datasetPid}&siteUrl={siteUrl}" - + ((apiToken != null) ? "&apiToken={apiToken}" : "") - + "&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}&fileId={fileId}&storageIdentifier=" - + rawStorageId + "&fileName=" + df.getCurrentName(); - } + // Download + JsonObject files = GlobusUtil.getFilesMap(dataFiles, d); + + String downloadId = UUID.randomUUID().toString(); + downloadCache.put(downloadId, files); + appUrl = appUrl + "/download?dvLocale={localeCode}"; + callback = SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/" + d.getId() + + "/globusDownloadParameters?locale=" + localeCode + "&downloadId=" + downloadId; + + } + if (apiToken != null) { + callback = UrlSignerUtil.signUrl(callback, 5, apiToken.getAuthenticatedUser().getUserIdentifier(), + HttpMethod.GET, + JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + apiToken.getTokenString()); + } else { + // Shouldn't happen + logger.warning("Unable to get api token for user: " + user.getIdentifier()); } - return tokenUtil.replaceTokensWithValues(appUrl) + "&storePrefix=" + storePrefix; + appUrl = appUrl + "&callback=" + Base64.getEncoder().encodeToString(StringUtils.getBytesUtf8(callback)); + + String finalUrl = tokenUtil.replaceTokensWithValues(appUrl); + logger.fine("Calling app: " + finalUrl); + return finalUrl; } - public String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken) { - return URLTokenUtil.getScriptForUrl(getGlobusAppUrlForDataset(dataset, false, null)); - + private String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken, List downloadDFList) { + return URLTokenUtil.getScriptForUrl(getGlobusAppUrlForDataset(dataset, false, downloadDFList)); } - + @Asynchronous @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void globusUpload(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, + public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, AuthenticatedUser authUser) throws ExecutionException, InterruptedException, MalformedURLException { Integer countAll = 0; @@ -585,40 +661,34 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin globusLogger = logger; } - globusLogger.info("Starting an globusUpload "); - - String datasetIdentifier = dataset.getStorageIdentifier(); - - // ToDo - use DataAccess methods? - String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3); - datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3); - - Thread.sleep(5000); - - JsonObject jsonObject = null; - try (StringReader rdr = new StringReader(jsonData)) { - jsonObject = Json.createReader(rdr).readObject(); - } catch (Exception jpe) { - jpe.printStackTrace(); - logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); - } - logger.info("json: " + JsonUtil.prettyPrint(jsonObject)); + logger.fine("json: " + JsonUtil.prettyPrint(jsonData)); - String taskIdentifier = jsonObject.getString("taskIdentifier"); + String taskIdentifier = jsonData.getString("taskIdentifier"); - String ruleId = ""; - try { - ruleId = jsonObject.getString("ruleId"); - } catch (NullPointerException npe) { - logger.warning("NPE for jsonData object"); + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); + GlobusTask task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); + String ruleId = getRuleId(endpoint, task.getOwner_id(), "rw"); + logger.fine("Found rule: " + ruleId); + if (ruleId != null) { + Long datasetId = rulesCache.getIfPresent(ruleId); + if (datasetId != null) { + // Will not delete rule + rulesCache.invalidate(ruleId); + } } + // Wait before first check + Thread.sleep(5000); // globus task status check - GlobusTask task = globusStatusCheck(taskIdentifier, globusLogger); + task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); String taskStatus = getTaskStatus(task); - if (ruleId.length() > 0) { - deletePermision(ruleId, globusLogger); + globusLogger.info("Starting a globusUpload "); + + if (ruleId != null) { + // Transfer is complete, so delete rule + deletePermission(ruleId, dataset, globusLogger); + } // If success, switch to an EditInProgress lock - do this before removing the @@ -660,21 +730,30 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin // List inputList = new ArrayList(); - JsonArray filesJsonArray = jsonObject.getJsonArray("files"); + JsonArray filesJsonArray = jsonData.getJsonArray("files"); if (filesJsonArray != null) { + String datasetIdentifier = dataset.getAuthorityForFileStorage() + "/" + + dataset.getIdentifierForFileStorage(); for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from // externalTool String storageIdentifier = fileJsonObject.getString("storageIdentifier"); - String[] bits = storageIdentifier.split(":"); - String bucketName = bits[1].replace("/", ""); + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); + String storeId = parts[0]; + // If this is an S3 store, we need to split out the bucket name + String[] bits = parts[1].split(":"); + String bucketName = ""; + if (bits.length > 1) { + bucketName = bits[0]; + } String fileId = bits[bits.length - 1]; // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 - String fullPath = storageType + bucketName + "/" + datasetIdentifier + "/" + fileId; + // or globus:///10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 + String fullPath = storeId + "://" + bucketName + "/" + datasetIdentifier + "/" + fileId; String fileName = fileJsonObject.getString("fileName"); inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName); @@ -683,7 +762,8 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin // calculateMissingMetadataFields: checksum, mimetype JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, globusLogger); JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); - + logger.fine("Size: " + newfilesJsonArray.size()); + logger.fine("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0))); JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder(); for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { @@ -691,29 +771,33 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin countAll++; String storageIdentifier = fileJsonObject.getString("storageIdentifier"); String fileName = fileJsonObject.getString("fileName"); - String directoryLabel = fileJsonObject.getString("directoryLabel"); - String[] bits = storageIdentifier.split(":"); + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); + // If this is an S3 store, we need to split out the bucket name + String[] bits = parts[1].split(":"); + if (bits.length > 1) { + } String fileId = bits[bits.length - 1]; List newfileJsonObject = IntStream.range(0, newfilesJsonArray.size()) .mapToObj(index -> ((JsonObject) newfilesJsonArray.get(index)).getJsonObject(fileId)) .filter(Objects::nonNull).collect(Collectors.toList()); - if (newfileJsonObject != null) { - if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { - JsonPatch path = Json.createPatchBuilder() - .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); - fileJsonObject = path.apply(fileJsonObject); - path = Json.createPatchBuilder() - .add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); - fileJsonObject = path.apply(fileJsonObject); - jsonDataSecondAPI.add(fileJsonObject); - countSuccess++; - } else { - globusLogger.info(fileName - + " will be skipped from adding to dataset by second API due to missing values "); - countError++; - } + logger.fine("List Size: " + newfileJsonObject.size()); + // if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { + JsonPatch path = Json.createPatchBuilder() + .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); + fileJsonObject = path.apply(fileJsonObject); + path = Json.createPatchBuilder() + .add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); + fileJsonObject = path.apply(fileJsonObject); + jsonDataSecondAPI.add(fileJsonObject); + countSuccess++; + // } else { + // globusLogger.info(fileName + // + " will be skipped from adding to dataset by second API due to missing + // values "); + // countError++; + // } } else { globusLogger.info(fileName + " will be skipped from adding to dataset by second API due to missing values "); @@ -730,6 +814,9 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; System.out.println("*******====command ==== " + command); + // ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of + // calling API + String output = addFilesAsync(command, globusLogger); if (output.equalsIgnoreCase("ok")) { // if(!taskSkippedFiles) @@ -756,10 +843,6 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin globusLogger.info("Files failures: " + countError.toString()); globusLogger.info("Finished upload via Globus job."); - if (fileHandlerSuceeded) { - fileHandler.close(); - } - } catch (Exception e) { logger.info("Exception from globusUpload call "); e.printStackTrace(); @@ -767,6 +850,13 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); } } + if (ruleId != null) { + deletePermission(ruleId, dataset, globusLogger); + globusLogger.info("Removed upload permission: " + ruleId); + } + if (fileHandlerSuceeded) { + fileHandler.close(); + } } public String addFilesAsync(String curlCommand, Logger globusLogger) @@ -808,17 +898,16 @@ private String addFiles(String curlCommand, Logger globusLogger) { sb.append(line); globusLogger.info(" API Output : " + sb.toString()); JsonObject jsonObject = null; - try (StringReader rdr = new StringReader(sb.toString())) { - jsonObject = Json.createReader(rdr).readObject(); - } catch (Exception jpe) { - jpe.printStackTrace(); - globusLogger.log(Level.SEVERE, "Error parsing dataset json."); - } + jsonObject = JsonUtil.getJsonObject(sb.toString()); status = jsonObject.getString("status"); } catch (Exception ex) { - globusLogger.log(Level.SEVERE, - "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex); + if (ex instanceof JsonParsingException) { + globusLogger.log(Level.SEVERE, "Error parsing dataset json."); + } else { + globusLogger.log(Level.SEVERE, + "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex); + } } return status; @@ -850,31 +939,47 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro globusLogger = logger; } - globusLogger.info("Starting an globusDownload "); + globusLogger.info("Starting a globusDownload "); JsonObject jsonObject = null; - try (StringReader rdr = new StringReader(jsonData)) { - jsonObject = Json.createReader(rdr).readObject(); + try { + jsonObject = JsonUtil.getJsonObject(jsonData); } catch (Exception jpe) { jpe.printStackTrace(); - globusLogger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); + globusLogger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}", jsonData); + // TODO: stop the process after this parsing exception. } String taskIdentifier = jsonObject.getString("taskIdentifier"); - String ruleId = ""; - try { - jsonObject.getString("ruleId"); - } catch (NullPointerException npe) { - - } + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); + logger.fine("Endpoint path: " + endpoint.getBasePath()); + // If the rules_cache times out, the permission will be deleted. Presumably that + // doesn't affect a // globus task status check - GlobusTask task = globusStatusCheck(taskIdentifier, globusLogger); + GlobusTask task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); + String ruleId = getRuleId(endpoint, task.getOwner_id(), "r"); + if (ruleId != null) { + logger.fine("Found rule: " + ruleId); + Long datasetId = rulesCache.getIfPresent(ruleId); + if (datasetId != null) { + logger.fine("Deleting from cache: rule: " + ruleId); + // Will not delete rule + rulesCache.invalidate(ruleId); + } + } else { + // Something is wrong - the rule should be there (a race with the cache timing + // out?) + logger.warning("ruleId not found for taskId: " + taskIdentifier); + } + task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); String taskStatus = getTaskStatus(task); - if (ruleId.length() > 0) { - deletePermision(ruleId, globusLogger); + // Transfer is done (success or failure) so delete the rule + if (ruleId != null) { + logger.fine("Deleting: rule: " + ruleId); + deletePermission(ruleId, dataset, globusLogger); } if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { @@ -899,18 +1004,18 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro Executor executor = Executors.newFixedThreadPool(10); - private GlobusTask globusStatusCheck(String taskId, Logger globusLogger) throws MalformedURLException { + private GlobusTask globusStatusCheck(GlobusEndpoint endpoint, String taskId, Logger globusLogger) + throws MalformedURLException { boolean taskCompletion = false; String status = ""; GlobusTask task = null; - int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 50); + int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault( + settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 50); do { try { globusLogger.info("checking globus transfer task " + taskId); Thread.sleep(pollingInterval * 1000); - AccessToken clientTokenUser = getClientToken(); - // success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskId); - task = getTask(clientTokenUser, taskId, globusLogger); + task = getTask(endpoint.getClientToken(), taskId, globusLogger); if (task != null) { status = task.getStatus(); if (status != null) { @@ -953,7 +1058,7 @@ private String getTaskStatus(GlobusTask task) { if (task != null) { status = task.getStatus(); if (status != null) { - // The task is in progress. + // The task is in progress but is not ok or queued if (status.equalsIgnoreCase("ACTIVE")) { status = "FAILED" + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); } else { @@ -983,7 +1088,7 @@ public JsonObject calculateMissingMetadataFields(List inputList, Logger .collect(Collectors.toList()); }); - CompletableFuture completableFuture = allCompletableFuture.thenApply(files -> { + CompletableFuture completableFuture = allCompletableFuture.thenApply(files -> { return files.stream().map(d -> json(d)).collect(toJsonArray()); }); @@ -996,7 +1101,6 @@ public JsonObject calculateMissingMetadataFields(List inputList, Logger } private CompletableFuture calculateDetailsAsync(String id, Logger globusLogger) { - // logger.info(" calcualte additional details for these globus id ==== " + id); return CompletableFuture.supplyAsync(() -> { try { @@ -1024,7 +1128,7 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) String fullPath = id.split("IDsplit")[1]; String fileName = id.split("IDsplit")[2]; - // ToDo: what if the file doesnot exists in s3 + // ToDo: what if the file does not exist in s3 // ToDo: what if checksum calculation failed do { @@ -1035,9 +1139,9 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) count = 3; } catch (IOException ioex) { count = 3; - logger.info(ioex.getMessage()); - globusLogger.info("S3AccessIO: DataFile (fullPAth " + fullPath - + ") does not appear to be an S3 object associated with driver: "); + logger.fine(ioex.getMessage()); + globusLogger.info( + "DataFile (fullPath " + fullPath + ") does not appear to be accessible within Dataverse: "); } catch (Exception ex) { count = count + 1; ex.printStackTrace(); @@ -1048,7 +1152,7 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) } while (count < 3); if (checksumVal.length() == 0) { - checksumVal = "NULL"; + checksumVal = "Not available in Dataverse"; } String mimeType = calculatemime(fileName); @@ -1064,7 +1168,7 @@ public String calculatemime(String fileName) throws InterruptedException { String finalType = FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; String type = FileUtil.determineFileTypeByNameAndExtension(fileName); - if (type!=null && !type.isBlank()) { + if (type != null && !type.isBlank()) { if (FileUtil.useRecognizedType(finalType, type)) { finalType = type; } @@ -1072,194 +1176,106 @@ public String calculatemime(String fileName) throws InterruptedException { return finalType; } - /* - * public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) - * throws MalformedURLException { - * - * logger.info("=====Tasklist == dataset id :" + dataset.getId()); String - * directory = null; - * - * try { - * - * List fileMetadatas = new ArrayList<>(); - * - * StorageIO datasetSIO = DataAccess.getStorageIO(dataset); - * - * - * - * DatasetVersion workingVersion = dataset.getEditVersion(); - * - * if (workingVersion.getCreateTime() != null) { - * workingVersion.setCreateTime(new Timestamp(new Date().getTime())); } - * - * directory = dataset.getAuthorityForFileStorage() + "/" + - * dataset.getIdentifierForFileStorage(); - * - * System.out.println("======= directory ==== " + directory + - * " ==== datasetId :" + dataset.getId()); Map checksumMapOld - * = new HashMap<>(); - * - * Iterator fmIt = workingVersion.getFileMetadatas().iterator(); - * - * while (fmIt.hasNext()) { FileMetadata fm = fmIt.next(); if (fm.getDataFile() - * != null && fm.getDataFile().getId() != null) { String chksum = - * fm.getDataFile().getChecksumValue(); if (chksum != null) { - * checksumMapOld.put(chksum, 1); } } } - * - * List dFileList = new ArrayList<>(); boolean update = false; for - * (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { - * - * String s3ObjectKey = s3ObjectSummary.getKey(); - * - * - * String t = s3ObjectKey.replace(directory, ""); - * - * if (t.indexOf(".") > 0) { long totalSize = s3ObjectSummary.getSize(); String - * filePath = s3ObjectKey; String fileName = - * filePath.split("/")[filePath.split("/").length - 1]; String fullPath = - * datasetSIO.getStorageLocation() + "/" + fileName; - * - * logger.info("Full path " + fullPath); StorageIO dataFileStorageIO = - * DataAccess.getDirectStorageIO(fullPath); InputStream in = - * dataFileStorageIO.getInputStream(); - * - * String checksumVal = FileUtil.calculateChecksum(in, - * DataFile.ChecksumType.MD5); //String checksumVal = s3ObjectSummary.getETag(); - * logger.info("The checksum is " + checksumVal); if - * ((checksumMapOld.get(checksumVal) != null)) { logger.info("datasetId :" + - * dataset.getId() + "======= filename ==== " + filePath + - * " == file already exists "); } else if (filePath.contains("cached") || - * filePath.contains(".thumb")) { logger.info(filePath + " is ignored"); } else - * { update = true; logger.info("datasetId :" + dataset.getId() + - * "======= filename ==== " + filePath + " == new file "); try { - * - * DataFile datafile = new DataFile(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE); - * //MIME_TYPE_GLOBUS datafile.setModificationTime(new Timestamp(new - * Date().getTime())); datafile.setCreateDate(new Timestamp(new - * Date().getTime())); datafile.setPermissionModificationTime(new Timestamp(new - * Date().getTime())); - * - * FileMetadata fmd = new FileMetadata(); - * - * - * fmd.setLabel(fileName); fmd.setDirectoryLabel(filePath.replace(directory, - * "").replace(File.separator + fileName, "")); - * - * fmd.setDataFile(datafile); - * - * datafile.getFileMetadatas().add(fmd); - * - * FileUtil.generateS3PackageStorageIdentifierForGlobus(datafile); - * logger.info("==== datasetId :" + dataset.getId() + "======= filename ==== " - * + filePath + " == added to datafile, filemetadata "); - * - * try { // We persist "SHA1" rather than "SHA-1". - * //datafile.setChecksumType(DataFile.ChecksumType.SHA1); - * datafile.setChecksumType(DataFile.ChecksumType.MD5); - * datafile.setChecksumValue(checksumVal); } catch (Exception cksumEx) { - * logger.info("==== datasetId :" + dataset.getId() + - * "======Could not calculate checksumType signature for the new file "); } - * - * datafile.setFilesize(totalSize); - * - * dFileList.add(datafile); - * - * } catch (Exception ioex) { logger.info("datasetId :" + dataset.getId() + - * "======Failed to process and/or save the file " + ioex.getMessage()); return - * false; - * - * } } } } if (update) { - * - * List filesAdded = new ArrayList<>(); - * - * if (dFileList != null && dFileList.size() > 0) { - * - * // Dataset dataset = version.getDataset(); - * - * for (DataFile dataFile : dFileList) { - * - * if (dataFile.getOwner() == null) { dataFile.setOwner(dataset); - * - * workingVersion.getFileMetadatas().add(dataFile.getFileMetadata()); - * dataFile.getFileMetadata().setDatasetVersion(workingVersion); - * dataset.getFiles().add(dataFile); - * - * } - * - * filesAdded.add(dataFile); - * - * } - * - * logger.info("==== datasetId :" + dataset.getId() + - * " ===== Done! Finished saving new files to the dataset."); } - * - * fileMetadatas.clear(); for (DataFile addedFile : filesAdded) { - * fileMetadatas.add(addedFile.getFileMetadata()); } filesAdded = null; - * - * if (workingVersion.isDraft()) { - * - * logger.info("Async: ==== datasetId :" + dataset.getId() + - * " ==== inside draft version "); - * - * Timestamp updateTime = new Timestamp(new Date().getTime()); - * - * workingVersion.setLastUpdateTime(updateTime); - * dataset.setModificationTime(updateTime); - * - * - * for (FileMetadata fileMetadata : fileMetadatas) { - * - * if (fileMetadata.getDataFile().getCreateDate() == null) { - * fileMetadata.getDataFile().setCreateDate(updateTime); - * fileMetadata.getDataFile().setCreator((AuthenticatedUser) user); } - * fileMetadata.getDataFile().setModificationTime(updateTime); } - * - * - * } else { logger.info("datasetId :" + dataset.getId() + - * " ==== inside released version "); - * - * for (int i = 0; i < workingVersion.getFileMetadatas().size(); i++) { for - * (FileMetadata fileMetadata : fileMetadatas) { if - * (fileMetadata.getDataFile().getStorageIdentifier() != null) { - * - * if (fileMetadata.getDataFile().getStorageIdentifier().equals(workingVersion. - * getFileMetadatas().get(i).getDataFile().getStorageIdentifier())) { - * workingVersion.getFileMetadatas().set(i, fileMetadata); } } } } - * - * - * } - * - * - * try { Command cmd; logger.info("Async: ==== datasetId :" + - * dataset.getId() + - * " ======= UpdateDatasetVersionCommand START in globus function "); cmd = new - * UpdateDatasetVersionCommand(dataset, new DataverseRequest(user, - * (HttpServletRequest) null)); ((UpdateDatasetVersionCommand) - * cmd).setValidateLenient(true); //new DataverseRequest(authenticatedUser, - * (HttpServletRequest) null) //dvRequestService.getDataverseRequest() - * commandEngine.submit(cmd); } catch (CommandException ex) { - * logger.log(Level.WARNING, "==== datasetId :" + dataset.getId() + - * "======CommandException updating DatasetVersion from batch job: " + - * ex.getMessage()); return false; } - * - * logger.info("==== datasetId :" + dataset.getId() + - * " ======= GLOBUS CALL COMPLETED SUCCESSFULLY "); - * - * //return true; } - * - * } catch (Exception e) { String message = e.getMessage(); - * - * logger.info("==== datasetId :" + dataset.getId() + - * " ======= GLOBUS CALL Exception ============== " + message); - * e.printStackTrace(); return false; //return - * error(Response.Status.INTERNAL_SERVER_ERROR, - * "Uploaded files have passed checksum validation but something went wrong while attempting to move the files into Dataverse. Message was '" - * + message + "'."); } - * - * String globusBasicToken = - * settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""); - * AccessToken clientTokenUser = getClientToken(globusBasicToken); - * updatePermision(clientTokenUser, directory, "identity", "r"); return true; } - * - */ + + private GlobusEndpoint getGlobusEndpoint(DvObject dvObject) { + Dataset dataset = null; + if (dvObject instanceof Dataset) { + dataset = (Dataset) dvObject; + } else if (dvObject instanceof DataFile) { + dataset = (Dataset) dvObject.getOwner(); + } else { + throw new IllegalArgumentException("Unsupported DvObject type: " + dvObject.getClass().getName()); + } + String driverId = dataset.getEffectiveStorageDriverId(); + GlobusEndpoint endpoint = null; + + String directoryPath = GlobusAccessibleStore.getTransferPath(driverId); + + if (GlobusAccessibleStore.isDataverseManaged(driverId) && (dataset != null)) { + directoryPath = directoryPath + "/" + dataset.getAuthorityForFileStorage() + "/" + + dataset.getIdentifierForFileStorage(); + } else { + // remote store - may have path in file storageidentifier + String relPath = dvObject.getStorageIdentifier() + .substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); + int filenameStart = relPath.lastIndexOf("/") + 1; + if (filenameStart > 0) { + directoryPath = directoryPath + relPath.substring(0, filenameStart); + } + } + logger.fine("directoryPath finally: " + directoryPath); + + String endpointId = GlobusAccessibleStore.getTransferEndpointId(driverId); + + logger.fine("endpointId: " + endpointId); + + String globusToken = GlobusAccessibleStore.getGlobusToken(driverId); + + AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); + String clientToken = accessToken.getOtherTokens().get(0).getAccessToken(); + endpoint = new GlobusEndpoint(endpointId, clientToken, directoryPath); + + return endpoint; + } + + // This helper method is called from the Download terms/guestbook/etc. popup, + // when the user clicks the "ok" button. We use it, instead of calling + // downloadServiceBean directly, in order to differentiate between single + // file downloads and multiple (batch) downloads - since both use the same + // terms/etc. popup. + public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, + boolean doNotSaveGuestbookResponse) { + PrimeFaces.current().executeScript("PF('guestbookAndTermsPopup').hide()"); + guestbookResponse.setEventType(GuestbookResponse.DOWNLOAD); + + ApiToken apiToken = null; + User user = session.getUser(); + if (user instanceof AuthenticatedUser) { + apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) user); + } else if (user instanceof PrivateUrlUser) { + PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; + PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); + apiToken = new ApiToken(); + apiToken.setTokenString(privUrl.getToken()); + } + + DataFile df = guestbookResponse.getDataFile(); + if (df != null) { + logger.fine("Single datafile case for writeGuestbookAndStartTransfer"); + List downloadDFList = new ArrayList(1); + downloadDFList.add(df); + if (!doNotSaveGuestbookResponse) { + fileDownloadService.writeGuestbookResponseRecord(guestbookResponse); + } + PrimeFaces.current().executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, downloadDFList)); + } else { + // Following FileDownloadServiceBean writeGuestbookAndStartBatchDownload + List list = new ArrayList<>(Arrays.asList(guestbookResponse.getSelectedFileIds().split(","))); + List selectedFiles = new ArrayList(); + for (String idAsString : list) { + try { + Long fileId = Long.parseLong(idAsString); + // If we need to create a GuestBookResponse record, we have to + // look up the DataFile object for this file: + if (!doNotSaveGuestbookResponse) { + df = dataFileService.findCheapAndEasy(fileId); + guestbookResponse.setDataFile(df); + fileDownloadService.writeGuestbookResponseRecord(guestbookResponse); + selectedFiles.add(df); + } + } catch (NumberFormatException nfe) { + logger.warning( + "A file id passed to the writeGuestbookAndStartTransfer method as a string could not be converted back to Long: " + + idAsString); + return; + } + + } + if (!selectedFiles.isEmpty()) { + // Use dataset from one file - files should all be from the same dataset + PrimeFaces.current().executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, selectedFiles)); + } + } + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java new file mode 100644 index 00000000000..92cf8ac7704 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java @@ -0,0 +1,33 @@ +package edu.harvard.iq.dataverse.globus; + +import java.util.List; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; +import jakarta.json.Json; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; + +public class GlobusUtil { + + public static JsonObject getFilesMap(List dataFiles, Dataset d) { + JsonObjectBuilder filesBuilder = Json.createObjectBuilder(); + for (DataFile df : dataFiles) { + String storageId = df.getStorageIdentifier(); + String[] parts = DataAccess + .getDriverIdAndStorageLocation(DataAccess.getLocationFromStorageId(storageId, d)); + String driverId = parts[0]; + String fileLocation = parts[1]; + if (GlobusAccessibleStore.isDataverseManaged(driverId)) { + String endpointWithBasePath = GlobusAccessibleStore.getTransferEnpointWithPath(driverId); + fileLocation = endpointWithBasePath + "/" + fileLocation; + } else { + fileLocation = storageId.substring(storageId.lastIndexOf("//") + 2); + } + filesBuilder.add(df.getId().toString(), fileLocation); + } + return filesBuilder.build(); + } +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java index 1b4a7bc7db0..cc15d4c978b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java @@ -40,10 +40,6 @@ @Stateless @Named public class OAIRecordServiceBean implements java.io.Serializable { - @EJB - OAISetServiceBean oaiSetService; - @EJB - IndexServiceBean indexService; @EJB DatasetServiceBean datasetService; @EJB @@ -55,13 +51,24 @@ public class OAIRecordServiceBean implements java.io.Serializable { EntityManager em; private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.server.OAIRecordServiceBean"); - - public void updateOaiRecords(String setName, List datasetIds, Date updateTime, boolean doExport) { - updateOaiRecords(setName, datasetIds, updateTime, doExport, logger); - } - public void updateOaiRecords(String setName, List datasetIds, Date updateTime, boolean doExport, Logger setUpdateLogger) { - + /** + * Updates the OAI records for the set specified + * @param setName name of the OAI set + * @param datasetIds ids of the datasets that are candidates for this OAI set + * @param updateTime time stamp + * @param doExport attempt to export datasets that haven't been exported yet + * @param confirmed true if the datasetIds above were looked up in the database + * - as opposed to in the search engine. Meaning, that it is + * confirmed that any dataset not on this list that's currently + * in the set is no longer in the database and should be + * marked as deleted without any further checks. Otherwise + * we'll want to double-check if the dataset still exists + * as published. This is to prevent marking existing datasets + * as deleted during a full reindex etc. + * @param setUpdateLogger dedicated Logger + */ + public void updateOaiRecords(String setName, List datasetIds, Date updateTime, boolean doExport, boolean confirmed, Logger setUpdateLogger) { // create Map of OaiRecords List oaiRecords = findOaiRecordsBySetName(setName); Map recordMap = new HashMap<>(); @@ -101,9 +108,6 @@ public void updateOaiRecords(String setName, List datasetIds, Date updateT DatasetVersion releasedVersion = dataset.getReleasedVersion(); Date publicationDate = releasedVersion == null ? null : releasedVersion.getReleaseTime(); - //if (dataset.getPublicationDate() != null - // && (dataset.getLastExportTime() == null - // || dataset.getLastExportTime().before(dataset.getPublicationDate()))) { if (publicationDate != null && (dataset.getLastExportTime() == null || dataset.getLastExportTime().before(publicationDate))) { @@ -125,7 +129,9 @@ public void updateOaiRecords(String setName, List datasetIds, Date updateT } // anything left in the map should be marked as removed! - markOaiRecordsAsRemoved( recordMap.values(), updateTime, setUpdateLogger); + markOaiRecordsAsRemoved(recordMap.values(), updateTime, confirmed, setUpdateLogger); + + } @@ -162,7 +168,7 @@ record = new OAIRecord(setName, dataset.getGlobalId().asString(), new Date()); } } - + /* // Updates any existing OAI records for this dataset // Should be called whenever there's a change in the release status of the Dataset // (i.e., when it's published or deaccessioned), so that the timestamps and @@ -201,13 +207,31 @@ public void updateOaiRecordsForDataset(Dataset dataset) { logger.fine("Null returned - no records found."); } } +*/ - public void markOaiRecordsAsRemoved(Collection records, Date updateTime, Logger setUpdateLogger) { + public void markOaiRecordsAsRemoved(Collection records, Date updateTime, boolean confirmed, Logger setUpdateLogger) { for (OAIRecord oaiRecord : records) { if ( !oaiRecord.isRemoved() ) { - setUpdateLogger.fine("marking OAI record "+oaiRecord.getGlobalId()+" as removed"); - oaiRecord.setRemoved(true); - oaiRecord.setLastUpdateTime(updateTime); + boolean confirmedRemoved = confirmed; + if (!confirmedRemoved) { + Dataset lookedUp = datasetService.findByGlobalId(oaiRecord.getGlobalId()); + if (lookedUp == null) { + confirmedRemoved = true; + } else if (lookedUp.getLastExportTime() == null) { + confirmedRemoved = true; + } else { + boolean isReleased = lookedUp.getReleasedVersion() != null; + if (!isReleased) { + confirmedRemoved = true; + } + } + } + + if (confirmedRemoved) { + setUpdateLogger.fine("marking OAI record "+oaiRecord.getGlobalId()+" as removed"); + oaiRecord.setRemoved(true); + oaiRecord.setLastUpdateTime(updateTime); + } } else { setUpdateLogger.fine("OAI record "+oaiRecord.getGlobalId()+" is already marked as removed."); } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAISetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAISetServiceBean.java index 2bd666401c7..b3a09391bf3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAISetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAISetServiceBean.java @@ -25,6 +25,7 @@ import jakarta.inject.Named; import jakarta.persistence.EntityManager; import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.BaseHttpSolrClient.RemoteSolrException; @@ -121,6 +122,25 @@ public List findAllNamedSets() { } } + /** + * "Active" sets are the ones that have been successfully exported, and contain + * a non-zero number of records. (Although a set that contains a number of + * records that are all marked as "deleted" is still an active set!) + * @return list of OAISets + */ + public List findAllActiveNamedSets() { + String jpaQueryString = "select object(o) " + + "from OAISet as o, OAIRecord as r " + + "where r.setName = o.spec " + + "and o.spec != '' " + + "group by o order by o.spec"; + + Query query = em.createQuery(jpaQueryString); + List queryResults = query.getResultList(); + + return queryResults; + } + @Asynchronous public void remove(Long setId) { OAISet oaiSet = find(setId); @@ -151,6 +171,8 @@ public void exportOaiSet(OAISet oaiSet, Logger exportLogger) { String query = managedSet.getDefinition(); List datasetIds; + boolean databaseLookup = false; // As opposed to a search engine lookup + try { if (!oaiSet.isDefaultSet()) { datasetIds = expandSetQuery(query); @@ -161,6 +183,7 @@ public void exportOaiSet(OAISet oaiSet, Logger exportLogger) { // including the unpublished drafts and deaccessioned ones. // Those will be filtered out further down the line. datasetIds = datasetService.findAllLocalDatasetIds(); + databaseLookup = true; } } catch (OaiSetException ose) { datasetIds = null; @@ -171,7 +194,7 @@ public void exportOaiSet(OAISet oaiSet, Logger exportLogger) { // they will be properly marked as "deleted"! -- L.A. 4.5 //if (datasetIds != null && !datasetIds.isEmpty()) { exportLogger.info("Calling OAI Record Service to re-export " + datasetIds.size() + " datasets."); - oaiRecordService.updateOaiRecords(managedSet.getSpec(), datasetIds, new Date(), true, exportLogger); + oaiRecordService.updateOaiRecords(managedSet.getSpec(), datasetIds, new Date(), true, databaseLookup, exportLogger); //} managedSet.setUpdateInProgress(false); diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index 96a19acc0e8..233ca94f5fc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -31,6 +31,7 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.MailUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import io.gdcc.xoai.exceptions.BadVerbException; import io.gdcc.xoai.exceptions.OAIException; import io.gdcc.xoai.model.oaipmh.Granularity; import io.gdcc.xoai.services.impl.SimpleResumptionTokenFormat; @@ -48,6 +49,7 @@ import jakarta.servlet.http.HttpServlet; import jakarta.servlet.http.HttpServletRequest; import jakarta.servlet.http.HttpServletResponse; +import java.util.Map; import javax.xml.stream.XMLStreamException; import org.eclipse.microprofile.config.Config; import org.eclipse.microprofile.config.ConfigProvider; @@ -256,10 +258,16 @@ private void processRequest(HttpServletRequest httpServletRequest, HttpServletRe "Sorry. OAI Service is disabled on this Dataverse node."); return; } - - RawRequest rawRequest = RequestBuilder.buildRawRequest(httpServletRequest.getParameterMap()); - - OAIPMH handle = dataProvider.handle(rawRequest); + + Map params = httpServletRequest.getParameterMap(); + OAIPMH handle; + try { + RawRequest rawRequest = RequestBuilder.buildRawRequest(params); + handle = dataProvider.handle(rawRequest); + } catch (BadVerbException bve) { + handle = dataProvider.handle(params); + } + response.setContentType("text/xml;charset=UTF-8"); try (XmlWriter xmlWriter = new XmlWriter(response.getOutputStream(), repositoryConfiguration);) { diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java index b4e275b6059..1e713b08adb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java @@ -35,7 +35,7 @@ public void setSetService(OAISetServiceBean setService) { @Override public boolean supportSets() { - List dataverseOAISets = setService.findAllNamedSets(); + List dataverseOAISets = setService.findAllActiveNamedSets(); if (dataverseOAISets == null || dataverseOAISets.isEmpty()) { return false; @@ -46,7 +46,7 @@ public boolean supportSets() { @Override public List getSets() { logger.fine("calling retrieveSets()"); - List dataverseOAISets = setService.findAllNamedSets(); + List dataverseOAISets = setService.findAllActiveNamedSets(); List XOAISets = new ArrayList(); if (dataverseOAISets != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 40dc3d6fdd6..9bacafd173f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -48,6 +48,8 @@ import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; import edu.harvard.iq.dataverse.dataaccess.TabularSubsetGenerator; +import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; +import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; import edu.harvard.iq.dataverse.datavariable.SummaryStatistic; import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.ingest.metadataextraction.FileMetadataExtractor; @@ -70,7 +72,10 @@ import edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.por.PORFileReader; import edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.por.PORFileReaderSpi; import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean; +import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; import edu.harvard.iq.dataverse.util.*; +import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException; import org.apache.commons.io.IOUtils; //import edu.harvard.iq.dvn.unf.*; @@ -121,6 +126,7 @@ import jakarta.jms.Message; import jakarta.faces.application.FacesMessage; import jakarta.ws.rs.core.MediaType; +import java.text.MessageFormat; import ucar.nc2.NetcdfFile; import ucar.nc2.NetcdfFiles; @@ -146,6 +152,8 @@ public class IngestServiceBean { @EJB AuxiliaryFileServiceBean auxiliaryFileService; @EJB + StorageUseServiceBean storageUseService; + @EJB SystemConfig systemConfig; @Resource(lookup = "java:app/jms/queue/ingest") @@ -158,7 +166,8 @@ public class IngestServiceBean { private static String dateTimeFormat_ymdhmsS = "yyyy-MM-dd HH:mm:ss.SSS"; private static String dateFormat_ymd = "yyyy-MM-dd"; - // This method tries to permanently store new files on the filesystem. + // This method tries to permanently store new files in storage (on the filesystem, + // in an S3 bucket, etc.). // Then it adds the files that *have been successfully saved* to the // dataset (by attaching the DataFiles to the Dataset, and the corresponding // FileMetadatas to the DatasetVersion). It also tries to ensure that none @@ -167,282 +176,386 @@ public class IngestServiceBean { // DataFileCategory objects, if any were already assigned to the files). // It must be called before we attempt to permanently save the files in // the database by calling the Save command on the dataset and/or version. + + // !! There is way too much going on in this method. :( !! + + // @todo: Is this method a good candidate for turning into a dedicated Command? public List saveAndAddFilesToDataset(DatasetVersion version, - List newFiles, - DataFile fileToReplace, - boolean tabIngest) { - List ret = new ArrayList<>(); - - if (newFiles != null && newFiles.size() > 0) { - // ret = new ArrayList<>(); - // final check for duplicate file names; - // we tried to make the file names unique on upload, but then - // the user may have edited them on the "add files" page, and - // renamed FOOBAR-1.txt back to FOOBAR.txt... + List newFiles, + DataFile fileToReplace, + boolean tabIngest) { + UploadSessionQuotaLimit uploadSessionQuota = null; + List ret = new ArrayList<>(); + + if (newFiles != null && newFiles.size() > 0) { + // ret = new ArrayList<>(); + // final check for duplicate file names; + // we tried to make the file names unique on upload, but then + // the user may have edited them on the "add files" page, and + // renamed FOOBAR-1.txt back to FOOBAR.txt... IngestUtil.checkForDuplicateFileNamesFinal(version, newFiles, fileToReplace); - Dataset dataset = version.getDataset(); - - for (DataFile dataFile : newFiles) { - boolean unattached = false; - boolean savedSuccess = false; - if (dataFile.getOwner() == null) { - unattached = true; - dataFile.setOwner(dataset); - } + Dataset dataset = version.getDataset(); + long totalBytesSaved = 0L; - String[] storageInfo = DataAccess.getDriverIdAndStorageLocation(dataFile.getStorageIdentifier()); - String driverType = DataAccess.getDriverType(storageInfo[0]); - String storageLocation = storageInfo[1]; - String tempFileLocation = null; - Path tempLocationPath = null; - if (driverType.equals("tmp")) { //"tmp" is the default if no prefix or the "tmp://" driver - tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + storageLocation; - - // Try to save the file in its permanent location: - tempLocationPath = Paths.get(tempFileLocation); - WritableByteChannel writeChannel = null; - FileChannel readChannel = null; - - StorageIO dataAccess = null; - - try { - logger.fine("Attempting to create a new storageIO object for " + storageLocation); - dataAccess = DataAccess.createNewStorageIO(dataFile, storageLocation); - - logger.fine("Successfully created a new storageIO object."); - /* - * This commented-out code demonstrates how to copy bytes from a local - * InputStream (or a readChannel) into the writable byte channel of a Dataverse - * DataAccessIO object: - */ - - /* - * storageIO.open(DataAccessOption.WRITE_ACCESS); - * - * writeChannel = storageIO.getWriteChannel(); readChannel = new - * FileInputStream(tempLocationPath.toFile()).getChannel(); - * - * long bytesPerIteration = 16 * 1024; // 16K bytes long start = 0; while ( - * start < readChannel.size() ) { readChannel.transferTo(start, - * bytesPerIteration, writeChannel); start += bytesPerIteration; } - */ - - /* - * But it's easier to use this convenience method from the DataAccessIO: - * - * (if the underlying storage method for this file is local filesystem, the - * DataAccessIO will simply copy the file using Files.copy, like this: - * - * Files.copy(tempLocationPath, storageIO.getFileSystemLocation(), - * StandardCopyOption.REPLACE_EXISTING); - */ - dataAccess.savePath(tempLocationPath); - - // Set filesize in bytes - // - dataFile.setFilesize(dataAccess.getSize()); - savedSuccess = true; - logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel()); - - // TODO: reformat this file to remove the many tabs added in cc08330 - extractMetadataNcml(dataFile, tempLocationPath); - - } catch (IOException ioex) { - logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")"); - } finally { - if (readChannel != null) { - try { - readChannel.close(); - } catch (IOException e) { - } - } - if (writeChannel != null) { - try { - writeChannel.close(); - } catch (IOException e) { - } - } - } + if (systemConfig.isStorageQuotasEnforced()) { + // Check if this dataset is subject to any storage quotas: + uploadSessionQuota = fileService.getUploadSessionQuotaLimit(dataset); + } + + for (DataFile dataFile : newFiles) { + boolean unattached = false; + boolean savedSuccess = false; + if (dataFile.getOwner() == null) { + // is it ever "attached"? + // do we ever call this method with dataFile.getOwner() != null? + // - we really shouldn't be, either. + unattached = true; + dataFile.setOwner(dataset); + } + + String[] storageInfo = DataAccess.getDriverIdAndStorageLocation(dataFile.getStorageIdentifier()); + String driverType = DataAccess.getDriverType(storageInfo[0]); + String storageLocation = storageInfo[1]; + String tempFileLocation = null; + Path tempLocationPath = null; + long confirmedFileSize = 0L; + if (driverType.equals("tmp")) { //"tmp" is the default if no prefix or the "tmp://" driver + tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + storageLocation; + + // Try to save the file in its permanent location: + tempLocationPath = Paths.get(tempFileLocation); + WritableByteChannel writeChannel = null; + FileChannel readChannel = null; + + StorageIO dataAccess = null; + + try { + logger.fine("Attempting to create a new storageIO object for " + storageLocation); + dataAccess = DataAccess.createNewStorageIO(dataFile, storageLocation); + + logger.fine("Successfully created a new storageIO object."); + /** + * This commented-out code demonstrates how to copy + * bytes from a local InputStream (or a readChannel) + * into the writable byte channel of a Dataverse + * DataAccessIO object: + */ + + /** + * storageIO.open(DataAccessOption.WRITE_ACCESS); + * + * writeChannel = storageIO.getWriteChannel(); + * readChannel = new + * FileInputStream(tempLocationPath.toFile()).getChannel(); + * + * long bytesPerIteration = 16 * 1024; // 16K bytes long + * start = 0; + * while ( start < readChannel.size() ) { + * readChannel.transferTo(start, bytesPerIteration, writeChannel); start += bytesPerIteration; + * } + */ + + /** + * But it's easier to use this convenience method from + * the DataAccessIO: + * + * (if the underlying storage method for this file is + * local filesystem, the DataAccessIO will simply copy + * the file using Files.copy, like this: + * + * Files.copy(tempLocationPath, + * storageIO.getFileSystemLocation(), + * StandardCopyOption.REPLACE_EXISTING); + */ + dataAccess.savePath(tempLocationPath); + + // Set filesize in bytes + // + confirmedFileSize = dataAccess.getSize(); + dataFile.setFilesize(confirmedFileSize); + savedSuccess = true; + logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel()); + + // TODO: reformat this file to remove the many tabs added in cc08330 - done, I think? + extractMetadataNcml(dataFile, tempLocationPath); + + } catch (IOException ioex) { + logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")"); + } finally { + if (readChannel != null) { + try { + readChannel.close(); + } catch (IOException e) { + } + } + if (writeChannel != null) { + try { + writeChannel.close(); + } catch (IOException e) { + } + } + } // Since we may have already spent some CPU cycles scaling down image thumbnails, - // we may as well save them, by moving these generated images to the permanent - // dataset directory. We should also remember to delete any such files in the - // temp directory: - List generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()), - storageLocation); - if (generatedTempFiles != null) { - for (Path generated : generatedTempFiles) { - if (savedSuccess) { // no need to try to save this aux file permanently, if we've failed to - // save the main file! - logger.fine("(Will also try to permanently save generated thumbnail file " - + generated.toString() + ")"); - try { - // Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(), - // generated.getFileName().toString())); - int i = generated.toString().lastIndexOf("thumb"); - if (i > 1) { - String extensionTag = generated.toString().substring(i); - dataAccess.savePathAsAux(generated, extensionTag); - logger.fine( - "Saved generated thumbnail as aux object. \"preview available\" status: " - + dataFile.isPreviewImageAvailable()); - } else { - logger.warning( - "Generated thumbnail file name does not match the expected pattern: " - + generated.toString()); - } - - } catch (IOException ioex) { - logger.warning("Failed to save generated file " + generated.toString()); - } - } - - // ... but we definitely want to delete it: - try { - Files.delete(generated); - } catch (IOException ioex) { - logger.warning("Failed to delete generated file " + generated.toString()); - } - } - } - // Any necessary post-processing: - // performPostProcessingTasks(dataFile); - } else { - try { - StorageIO dataAccess = DataAccess.getStorageIO(dataFile); - //Populate metadata - dataAccess.open(DataAccessOption.READ_ACCESS); - //set file size - logger.fine("Setting file size: " + dataAccess.getSize()); - dataFile.setFilesize(dataAccess.getSize()); - if(dataAccess instanceof S3AccessIO) { - ((S3AccessIO)dataAccess).removeTempTag(); - } - } catch (IOException ioex) { - logger.warning("Failed to get file size, storage id " + dataFile.getStorageIdentifier() + " (" - + ioex.getMessage() + ")"); - } - savedSuccess = true; - } + // we may as well save them, by moving these generated images to the permanent + // dataset directory. We should also remember to delete any such files in the + // temp directory: + List generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()), + storageLocation); + if (generatedTempFiles != null) { + for (Path generated : generatedTempFiles) { + if (savedSuccess) { // no need to try to save this aux file permanently, if we've failed to + // save the main file! + logger.fine("(Will also try to permanently save generated thumbnail file " + + generated.toString() + ")"); + try { + // Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(), + // generated.getFileName().toString())); + int i = generated.toString().lastIndexOf("thumb"); + if (i > 1) { + String extensionTag = generated.toString().substring(i); + dataAccess.savePathAsAux(generated, extensionTag); + logger.fine( + "Saved generated thumbnail as aux object. \"preview available\" status: " + + dataFile.isPreviewImageAvailable()); + } else { + logger.warning( + "Generated thumbnail file name does not match the expected pattern: " + + generated.toString()); + } - logger.fine("Done! Finished saving new files in permanent storage and adding them to the dataset."); - boolean belowLimit = false; - - try { - //getting StorageIO may require knowing the owner (so this must come before owner is potentially set back to null - belowLimit = dataFile.getStorageIO().isBelowIngestSizeLimit(); - } catch (IOException e) { - logger.warning("Error getting ingest limit for file: " + dataFile.getIdentifier() + " : " + e.getMessage()); - } - - if (savedSuccess && belowLimit) { - // These are all brand new files, so they should all have - // one filemetadata total. -- L.A. - FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0); - String fileName = fileMetadata.getLabel(); - - boolean metadataExtracted = false; - boolean metadataExtractedFromNetcdf = false; - if (tabIngest && FileUtil.canIngestAsTabular(dataFile)) { - /* - * Note that we don't try to ingest the file right away - instead we mark it as - * "scheduled for ingest", then at the end of the save process it will be queued - * for async. ingest in the background. In the meantime, the file will be - * ingested as a regular, non-tabular file, and appear as such to the user, - * until the ingest job is finished with the Ingest Service. - */ - dataFile.SetIngestScheduled(); - } else if (fileMetadataExtractable(dataFile)) { - - try { - // FITS is the only type supported for metadata - // extraction, as of now. -- L.A. 4.0 - // Note that extractMetadataNcml() is used for NetCDF/HDF5. - dataFile.setContentType("application/fits"); - metadataExtracted = extractMetadata(tempFileLocation, dataFile, version); - } catch (IOException mex) { - logger.severe("Caught exception trying to extract indexable metadata from file " - + fileName + ", " + mex.getMessage()); - } - if (metadataExtracted) { - logger.fine("Successfully extracted indexable metadata from file " + fileName); - } else { - logger.fine("Failed to extract indexable metadata from file " + fileName); - } - } else if (fileMetadataExtractableFromNetcdf(dataFile, tempLocationPath)) { - try { - logger.fine("trying to extract metadata from netcdf"); - metadataExtractedFromNetcdf = extractMetadataFromNetcdf(tempFileLocation, dataFile, version); - } catch (IOException ex) { - logger.fine("could not extract metadata from netcdf: " + ex); - } - if (metadataExtractedFromNetcdf) { - logger.fine("Successfully extracted indexable metadata from netcdf file " + fileName); - } else { - logger.fine("Failed to extract indexable metadata from netcdf file " + fileName); - } + } catch (IOException ioex) { + logger.warning("Failed to save generated file " + generated.toString()); + } + } - } else if (FileUtil.MIME_TYPE_INGESTED_FILE.equals(dataFile.getContentType())) { + // ... but we definitely want to delete it: + try { + Files.delete(generated); + } catch (IOException ioex) { + logger.warning("Failed to delete generated file " + generated.toString()); + } + } + } + // Any necessary post-processing: + // performPostProcessingTasks(dataFile); + } else { + // This is a direct upload + try { + StorageIO dataAccess = DataAccess.getStorageIO(dataFile); + //Populate metadata + dataAccess.open(DataAccessOption.READ_ACCESS); + + confirmedFileSize = dataAccess.getSize(); + + // For directly-uploaded files, we will perform the file size + // limit and quota checks here. Perform them *again*, in + // some cases: a directly uploaded files have already been + // checked (for the sake of being able to reject the upload + // before the user clicks "save"). But in case of direct + // uploads via API, these checks haven't been performed yet, + // so, here's our chance. + + Long fileSizeLimit = systemConfig.getMaxFileUploadSizeForStore(version.getDataset().getEffectiveStorageDriverId()); + + if (fileSizeLimit == null || confirmedFileSize < fileSizeLimit) { + + //set file size + logger.fine("Setting file size: " + confirmedFileSize); + dataFile.setFilesize(confirmedFileSize); + + if (dataAccess instanceof S3AccessIO) { + ((S3AccessIO) dataAccess).removeTempTag(); + } + savedSuccess = true; + } + } catch (IOException ioex) { + logger.warning("Failed to get file size, storage id, or failed to remove the temp tag on the saved S3 object" + dataFile.getStorageIdentifier() + " (" + + ioex.getMessage() + ")"); + } + } + + // If quotas are enforced, we will perform a quota check here. + // If this is an upload via the UI, we must have already + // performed this check once. But it is possible that somebody + // else may have added more data to the same collection/dataset + // etc., before this user was ready to click "save", so this is + // necessary. For other cases, such as the direct uploads via + // the API, this is the single point in the workflow where + // storage quotas are enforced. + + if (savedSuccess) { + if (uploadSessionQuota != null) { + // It may be worth considering refreshing the quota here, + // and incrementing the Storage Use record for + // all the parent objects in real time, as + // *each* individual file is being saved. I experimented + // with that, but decided against it for performance + // reasons. But yes, there may be some edge case where + // parallel multi-file uploads can end up being able + // to save 2X worth the quota that was available at the + // beginning of each session. + if (confirmedFileSize > uploadSessionQuota.getRemainingQuotaInBytes()) { + savedSuccess = false; + logger.warning("file size over quota limit, skipping"); + // @todo: we need to figure out how to better communicate + // this (potentially partial) failure to the user. + //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(confirmedFileSize), bytesToHumanReadable(storageQuotaLimit))); + } else { + // Adjust quota: + logger.info("Setting total usage in bytes to " + (uploadSessionQuota.getTotalUsageInBytes() + confirmedFileSize)); + uploadSessionQuota.setTotalUsageInBytes(uploadSessionQuota.getTotalUsageInBytes() + confirmedFileSize); + } + } + + // ... unless we had to reject the file just now because of + // the quota limits, count the number of bytes saved for the + // purposes of incrementing the total storage of the parent + // DvObjectContainers: + + if (savedSuccess) { + totalBytesSaved += confirmedFileSize; + } + } + + logger.fine("Done! Finished saving new file in permanent storage and adding it to the dataset."); + boolean belowLimit = false; + + try { + //getting StorageIO may require knowing the owner (so this must come before owner is potentially set back to null + belowLimit = dataFile.getStorageIO().isBelowIngestSizeLimit(); + } catch (IOException e) { + logger.warning("Error getting ingest limit for file: " + dataFile.getIdentifier() + " : " + e.getMessage()); + } + + if (savedSuccess && belowLimit) { + // These are all brand new files, so they should all have + // one filemetadata total. -- L.A. + FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0); + String fileName = fileMetadata.getLabel(); + + boolean metadataExtracted = false; + boolean metadataExtractedFromNetcdf = false; + if (tabIngest && FileUtil.canIngestAsTabular(dataFile)) { + /** + * Note that we don't try to ingest the file right away + * - instead we mark it as "scheduled for ingest", then + * at the end of the save process it will be queued for + * async. ingest in the background. In the meantime, the + * file will be ingested as a regular, non-tabular file, + * and appear as such to the user, until the ingest job + * is finished with the Ingest Service. + */ + dataFile.SetIngestScheduled(); + } else if (fileMetadataExtractable(dataFile)) { + + try { + // FITS is the only type supported for metadata + // extraction, as of now. -- L.A. 4.0 + // Note that extractMetadataNcml() is used for NetCDF/HDF5. + dataFile.setContentType("application/fits"); + metadataExtracted = extractMetadata(tempFileLocation, dataFile, version); + } catch (IOException mex) { + logger.severe("Caught exception trying to extract indexable metadata from file " + + fileName + ", " + mex.getMessage()); + } + if (metadataExtracted) { + logger.fine("Successfully extracted indexable metadata from file " + fileName); + } else { + logger.fine("Failed to extract indexable metadata from file " + fileName); + } + } else if (fileMetadataExtractableFromNetcdf(dataFile, tempLocationPath)) { + try { + logger.fine("trying to extract metadata from netcdf"); + metadataExtractedFromNetcdf = extractMetadataFromNetcdf(tempFileLocation, dataFile, version); + } catch (IOException ex) { + logger.fine("could not extract metadata from netcdf: " + ex); + } + if (metadataExtractedFromNetcdf) { + logger.fine("Successfully extracted indexable metadata from netcdf file " + fileName); + } else { + logger.fine("Failed to extract indexable metadata from netcdf file " + fileName); + } + + } else if (FileUtil.MIME_TYPE_INGESTED_FILE.equals(dataFile.getContentType())) { // Make sure no *uningested* tab-delimited files are saved with the type "text/tab-separated-values"! // "text/tsv" should be used instead: dataFile.setContentType(FileUtil.MIME_TYPE_TSV); } - } + } if (unattached) { dataFile.setOwner(null); } - // ... and let's delete the main temp file if it exists: - if(tempLocationPath!=null) { - try { - logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString()); - Files.delete(tempLocationPath); - } catch (IOException ex) { - // (non-fatal - it's just a temp file.) - logger.warning("Failed to delete temp file " + tempLocationPath.toString()); - } - } - if (savedSuccess) { - // temp dbug line - // System.out.println("ADDING FILE: " + fileName + "; for dataset: " + - // dataset.getGlobalId()); - // Make sure the file is attached to the dataset and to the version, if this - // hasn't been done yet: - if (dataFile.getOwner() == null) { - dataFile.setOwner(dataset); - - version.getFileMetadatas().add(dataFile.getFileMetadata()); - dataFile.getFileMetadata().setDatasetVersion(version); - dataset.getFiles().add(dataFile); - - if (dataFile.getFileMetadata().getCategories() != null) { - ListIterator dfcIt = dataFile.getFileMetadata().getCategories() - .listIterator(); - - while (dfcIt.hasNext()) { - DataFileCategory dataFileCategory = dfcIt.next(); - - if (dataFileCategory.getDataset() == null) { - DataFileCategory newCategory = dataset - .getCategoryByName(dataFileCategory.getName()); - if (newCategory != null) { - newCategory.addFileMetadata(dataFile.getFileMetadata()); - // dataFileCategory = newCategory; - dfcIt.set(newCategory); - } else { - dfcIt.remove(); - } - } - } - } - } - } + // ... and let's delete the main temp file if it exists: + if (tempLocationPath != null) { + try { + logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString()); + Files.delete(tempLocationPath); + } catch (IOException ex) { + // (non-fatal - it's just a temp file.) + logger.warning("Failed to delete temp file " + tempLocationPath.toString()); + } + } + if (savedSuccess) { + // temp dbug line + // System.out.println("ADDING FILE: " + fileName + "; for dataset: " + + // dataset.getGlobalId()); + // Make sure the file is attached to the dataset and to the version, if this + // hasn't been done yet: + // @todo: but shouldn't we be doing the reverse if we haven't been + // able to save the file? - disconnect it from the dataset and + // the version?? - L.A. 2023 + // (that said, is there *ever* a case where dataFile.getOwner() != null ?) + if (dataFile.getOwner() == null) { + dataFile.setOwner(dataset); + + version.getFileMetadatas().add(dataFile.getFileMetadata()); + dataFile.getFileMetadata().setDatasetVersion(version); + dataset.getFiles().add(dataFile); + + if (dataFile.getFileMetadata().getCategories() != null) { + ListIterator dfcIt = dataFile.getFileMetadata().getCategories() + .listIterator(); + + while (dfcIt.hasNext()) { + DataFileCategory dataFileCategory = dfcIt.next(); + + if (dataFileCategory.getDataset() == null) { + DataFileCategory newCategory = dataset.getCategoryByName(dataFileCategory.getName()); + if (newCategory != null) { + newCategory.addFileMetadata(dataFile.getFileMetadata()); + // dataFileCategory = newCategory; + dfcIt.set(newCategory); + } else { + dfcIt.remove(); + } + } + } + } + } + + // Hmm. Noticing that the following two things - adding the + // files to the return list were being + // done outside of this "if (savedSuccess)" block. I'm pretty + // sure that was wrong. - L.A. 11-30-2023 + ret.add(dataFile); + // (unless that is that return value isn't used for anything - ?) + } - ret.add(dataFile); - } - } + } + // Update storage use for all the parent dvobjects: + logger.info("Incrementing recorded storage use by " + totalBytesSaved + " bytes for dataset " + dataset.getId()); + // Q. Need to consider what happens when this code is called on Create? + // A. It works on create as well, yes. (the recursive increment + // query in the method below does need the parent dataset to + // have the database id. But even if these files have been + // uploaded on the Create form, we first save the dataset, and + // then add the files to it. - L.A. + storageUseService.incrementStorageSizeRecursively(dataset.getId(), totalBytesSaved); + } - return ret; - } + return ret; + } public List listGeneratedTempFiles(Path tempDirectory, String baseName) { List generatedFiles = new ArrayList<>(); @@ -613,27 +726,17 @@ public void produceSummaryStatistics(DataFile dataFile, File generatedTabularFil } public void produceContinuousSummaryStatistics(DataFile dataFile, File generatedTabularFile) throws IOException { - - /* - // quick, but memory-inefficient way: - // - this method just loads the entire file-worth of continuous vectors - // into a Double[][] matrix. - //Double[][] variableVectors = subsetContinuousVectors(dataFile); - //calculateContinuousSummaryStatistics(dataFile, variableVectors); - - // A more sophisticated way: this subsets one column at a time, using - // the new optimized subsetting that does not have to read any extra - // bytes from the file to extract the column: - - TabularSubsetGenerator subsetGenerator = new TabularSubsetGenerator(); - */ for (int i = 0; i < dataFile.getDataTable().getVarQuantity(); i++) { if (dataFile.getDataTable().getDataVariables().get(i).isIntervalContinuous()) { logger.fine("subsetting continuous vector"); if ("float".equals(dataFile.getDataTable().getDataVariables().get(i).getFormat())) { - Float[] variableVector = TabularSubsetGenerator.subsetFloatVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue()); + Float[] variableVector = TabularSubsetGenerator.subsetFloatVector( + new FileInputStream(generatedTabularFile), + i, + dataFile.getDataTable().getCaseQuantity().intValue(), + dataFile.getDataTable().isStoredWithVariableHeader()); logger.fine("Calculating summary statistics on a Float vector;"); calculateContinuousSummaryStatistics(dataFile, i, variableVector); // calculate the UNF while we are at it: @@ -641,7 +744,11 @@ public void produceContinuousSummaryStatistics(DataFile dataFile, File generated calculateUNF(dataFile, i, variableVector); variableVector = null; } else { - Double[] variableVector = TabularSubsetGenerator.subsetDoubleVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue()); + Double[] variableVector = TabularSubsetGenerator.subsetDoubleVector( + new FileInputStream(generatedTabularFile), + i, + dataFile.getDataTable().getCaseQuantity().intValue(), + dataFile.getDataTable().isStoredWithVariableHeader()); logger.fine("Calculating summary statistics on a Double vector;"); calculateContinuousSummaryStatistics(dataFile, i, variableVector); // calculate the UNF while we are at it: @@ -663,7 +770,11 @@ public void produceDiscreteNumericSummaryStatistics(DataFile dataFile, File gene && dataFile.getDataTable().getDataVariables().get(i).isTypeNumeric()) { logger.fine("subsetting discrete-numeric vector"); - Long[] variableVector = TabularSubsetGenerator.subsetLongVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue()); + Long[] variableVector = TabularSubsetGenerator.subsetLongVector( + new FileInputStream(generatedTabularFile), + i, + dataFile.getDataTable().getCaseQuantity().intValue(), + dataFile.getDataTable().isStoredWithVariableHeader()); // We are discussing calculating the same summary stats for // all numerics (the same kind of sumstats that we've been calculating // for numeric continuous type) -- L.A. Jul. 2014 @@ -697,7 +808,11 @@ public void produceCharacterSummaryStatistics(DataFile dataFile, File generatedT if (dataFile.getDataTable().getDataVariables().get(i).isTypeCharacter()) { logger.fine("subsetting character vector"); - String[] variableVector = TabularSubsetGenerator.subsetStringVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue()); + String[] variableVector = TabularSubsetGenerator.subsetStringVector( + new FileInputStream(generatedTabularFile), + i, + dataFile.getDataTable().getCaseQuantity().intValue(), + dataFile.getDataTable().isStoredWithVariableHeader()); //calculateCharacterSummaryStatistics(dataFile, i, variableVector); // calculate the UNF while we are at it: logger.fine("Calculating UNF on a String vector"); @@ -715,20 +830,29 @@ public static void produceFrequencyStatistics(DataFile dataFile, File generatedT produceFrequencies(generatedTabularFile, vars); } - public static void produceFrequencies( File generatedTabularFile, List vars) throws IOException { + public static void produceFrequencies(File generatedTabularFile, List vars) throws IOException { for (int i = 0; i < vars.size(); i++) { Collection cats = vars.get(i).getCategories(); int caseQuantity = vars.get(i).getDataTable().getCaseQuantity().intValue(); boolean isNumeric = vars.get(i).isTypeNumeric(); + boolean skipVariableHeaderLine = vars.get(i).getDataTable().isStoredWithVariableHeader(); Object[] variableVector = null; if (cats.size() > 0) { if (isNumeric) { - variableVector = TabularSubsetGenerator.subsetFloatVector(new FileInputStream(generatedTabularFile), i, caseQuantity); + variableVector = TabularSubsetGenerator.subsetFloatVector( + new FileInputStream(generatedTabularFile), + i, + caseQuantity, + skipVariableHeaderLine); } else { - variableVector = TabularSubsetGenerator.subsetStringVector(new FileInputStream(generatedTabularFile), i, caseQuantity); + variableVector = TabularSubsetGenerator.subsetStringVector( + new FileInputStream(generatedTabularFile), + i, + caseQuantity, + skipVariableHeaderLine); } if (variableVector != null) { Hashtable freq = calculateFrequency(variableVector); @@ -810,6 +934,7 @@ public boolean ingestAsTabular(Long datafile_id) { DataFile dataFile = fileService.find(datafile_id); boolean ingestSuccessful = false; boolean forceTypeCheck = false; + boolean storingWithVariableHeader = systemConfig.isStoringIngestedFilesWithHeaders(); // Never attempt to ingest a file that's already ingested! if (dataFile.isTabularData()) { @@ -911,11 +1036,7 @@ public boolean ingestAsTabular(Long datafile_id) { TabularDataIngest tabDataIngest = null; try { - if (additionalData != null) { - tabDataIngest = ingestPlugin.read(inputStream, additionalData); - } else { - tabDataIngest = ingestPlugin.read(inputStream, null); - } + tabDataIngest = ingestPlugin.read(inputStream, storingWithVariableHeader, additionalData); } catch (IOException ingestEx) { dataFile.SetIngestProblem(); FileUtil.createIngestFailureReport(dataFile, ingestEx.getMessage()); @@ -968,6 +1089,7 @@ public boolean ingestAsTabular(Long datafile_id) { dataFile.setDataTable(tabDataIngest.getDataTable()); tabDataIngest.getDataTable().setDataFile(dataFile); tabDataIngest.getDataTable().setOriginalFileName(originalFileName); + dataFile.getDataTable().setStoredWithVariableHeader(storingWithVariableHeader); try { produceSummaryStatistics(dataFile, tabFile); @@ -1030,7 +1152,14 @@ public boolean ingestAsTabular(Long datafile_id) { } } - if (!databaseSaveSuccessful) { + if (databaseSaveSuccessful) { + // Add the size of the tab-delimited version of the data file + // that we have produced and stored to the recorded storage + // size of all the ancestor DvObjectContainers: + if (dataFile.getFilesize() > 0) { + storageUseService.incrementStorageSizeRecursively(dataFile.getOwner().getId(), dataFile.getFilesize()); + } + } else { logger.warning("Ingest failure (failed to save the tabular data in the database; file left intact as uploaded)."); return false; } @@ -1052,6 +1181,7 @@ public boolean ingestAsTabular(Long datafile_id) { // Replace contents of the file with the tab-delimited data produced: dataAccess.savePath(Paths.get(tabFile.getAbsolutePath())); + // Reset the file size: dataFile.setFilesize(dataAccess.getSize()); @@ -2177,7 +2307,7 @@ public static void main(String[] args) { TabularDataIngest tabDataIngest = null; try { - tabDataIngest = ingestPlugin.read(fileInputStream, null); + tabDataIngest = ingestPlugin.read(fileInputStream, false, null); } catch (IOException ingestEx) { System.err.println("Caught an exception trying to ingest file "+file+"."); System.exit(1); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/metadataextraction/spi/FileMetadataExtractorSpi.java b/src/main/java/edu/harvard/iq/dataverse/ingest/metadataextraction/spi/FileMetadataExtractorSpi.java index ab8f610cb06..a30dfafe67f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/metadataextraction/spi/FileMetadataExtractorSpi.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/metadataextraction/spi/FileMetadataExtractorSpi.java @@ -10,9 +10,7 @@ import java.util.logging.*; import java.io.*; -import edu.harvard.iq.dataverse.ingest.plugin.spi.RegisterableService; import edu.harvard.iq.dataverse.ingest.plugin.spi.IngestServiceProvider; -import edu.harvard.iq.dataverse.ingest.plugin.spi.ServiceRegistry; import java.nio.MappedByteBuffer; import java.util.Locale; @@ -44,22 +42,6 @@ public FileMetadataExtractorSpi(String vendorName, String version) { this.version = version; } - public void onRegistration(ServiceRegistry registry, - Class category) {} - - - public void onDeregistration(ServiceRegistry registry, - Class category) {} - - public String getVersion() { - return version; - } - - public String getVendorName() { - return vendorName; - } - - public abstract String getDescription(Locale locale); protected String[] names = null; diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/plugin/spi/DigraphNode.java b/src/main/java/edu/harvard/iq/dataverse/ingest/plugin/spi/DigraphNode.java deleted file mode 100644 index 4db48b5c06a..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/plugin/spi/DigraphNode.java +++ /dev/null @@ -1,188 +0,0 @@ -/* - Copyright (C) 2005-2012, by the President and Fellows of Harvard College. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - Dataverse Network - A web application to share, preserve and analyze research data. - Developed at the Institute for Quantitative Social Science, Harvard University. - Version 3.0. -*/ - -package edu.harvard.iq.dataverse.ingest.plugin.spi; - -// This file was Taken out from openjdk-6-src-b16-24_apr_2009.tar.gz -// http://download.java.net/openjdk/jdk6/promoted/b16/openjdk-6-src-b16-24_apr_2009.tar.gz -// downloaded: 2009-05-07 - - -/* - * Copyright 2000 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Sun designates this - * particular file as subject to the "Classpath" exception as provided - * by Sun in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - */ - -//package javax.imageio.spi; - -import java.io.Serializable; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Set; - -/** - * A node in a directed graph. In addition to an arbitrary - * Object containing user data associated with the node, - * each node maintains a Sets of nodes which are pointed - * to by the current node (available from getOutNodes). - * The in-degree of the node (that is, number of nodes that point to - * the current node) may be queried. - * - */ -class DigraphNode implements Cloneable, Serializable { - - /** The data associated with this node. */ - protected Object data; - - /** - * A Set of neighboring nodes pointed to by this - * node. - */ - protected Set outNodes = new HashSet(); - - /** The in-degree of the node. */ - protected int inDegree = 0; - - /** - * A Set of neighboring nodes that point to this - * node. - */ - private Set inNodes = new HashSet(); - - public DigraphNode(Object data) { - this.data = data; - } - - /** Returns the Object referenced by this node. */ - public Object getData() { - return data; - } - - /** - * Returns an Iterator containing the nodes pointed - * to by this node. - */ - public Iterator getOutNodes() { - return outNodes.iterator(); - } - - /** - * Adds a directed edge to the graph. The outNodes list of this - * node is updated and the in-degree of the other node is incremented. - * - * @param node a DigraphNode. - * - * @return true if the node was not previously the - * target of an edge. - */ - public boolean addEdge(DigraphNode node) { - if (outNodes.contains(node)) { - return false; - } - - outNodes.add(node); - node.inNodes.add(this); - node.incrementInDegree(); - return true; - } - - /** - * Returns true if an edge exists between this node - * and the given node. - * - * @param node a DigraphNode. - * - * @return true if the node is the target of an edge. - */ - public boolean hasEdge(DigraphNode node) { - return outNodes.contains(node); - } - - /** - * Removes a directed edge from the graph. The outNodes list of this - * node is updated and the in-degree of the other node is decremented. - * - * @return true if the node was previously the target - * of an edge. - */ - public boolean removeEdge(DigraphNode node) { - if (!outNodes.contains(node)) { - return false; - } - - outNodes.remove(node); - node.inNodes.remove(this); - node.decrementInDegree(); - return true; - } - - /** - * Removes this node from the graph, updating neighboring nodes - * appropriately. - */ - public void dispose() { - Object[] inNodesArray = inNodes.toArray(); - for(int i=0; iServiceRegistry. - * - * @param registry the ServiceRegistry instance. - * @param category a Class object that indicatges - * its registry category under which this object has been registered. - * category. - */ - public void onRegistration(ServiceRegistry registry, - Class category) {} - - /** - * A callback whenever this Spi class is deregistered from - * a ServiceRegistry. - * - * @param registry the ServiceRegistry instance. - * @param category a Class object that indicatges - * its registry category from which this object is being de-registered. - */ - public void onDeregistration(ServiceRegistry registry, - Class category) {} /** * Gets the value of the version field. * diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/plugin/spi/PartiallyOrderedSet.java b/src/main/java/edu/harvard/iq/dataverse/ingest/plugin/spi/PartiallyOrderedSet.java deleted file mode 100644 index 87f4f57cdb6..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/plugin/spi/PartiallyOrderedSet.java +++ /dev/null @@ -1,241 +0,0 @@ -/* - Copyright (C) 2005-2012, by the President and Fellows of Harvard College. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - Dataverse Network - A web application to share, preserve and analyze research data. - Developed at the Institute for Quantitative Social Science, Harvard University. - Version 3.0. -*/ - -package edu.harvard.iq.dataverse.ingest.plugin.spi; - -// This file was Taken out from openjdk-6-src-b16-24_apr_2009.tar.gz -// http://download.java.net/openjdk/jdk6/promoted/b16/openjdk-6-src-b16-24_apr_2009.tar.gz -// downloaded: 2009-05-07 - - -/* - * Copyright 2000 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Sun designates this - * particular file as subject to the "Classpath" exception as provided - * by Sun in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - */ - -//package javax.imageio.spi; - -import java.util.AbstractSet; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.Map; -import java.util.Set; - -/** - * A set of Objects with pairwise orderings between them. - * The iterator method provides the elements in - * topologically sorted order. Elements participating in a cycle - * are not returned. - * - * Unlike the SortedSet and SortedMap - * interfaces, which require their elements to implement the - * Comparable interface, this class receives ordering - * information via its setOrdering and - * unsetPreference methods. This difference is due to - * the fact that the relevant ordering between elements is unlikely to - * be inherent in the elements themselves; rather, it is set - * dynamically accoring to application policy. For example, in a - * service provider registry situation, an application might allow the - * user to set a preference order for service provider objects - * supplied by a trusted vendor over those supplied by another. - * - */ -class PartiallyOrderedSet extends AbstractSet { - - // The topological sort (roughly) follows the algorithm described in - // Horowitz and Sahni, _Fundamentals of Data Structures_ (1976), - // p. 315. - - // Maps Objects to DigraphNodes that contain them - private Map poNodes = new HashMap(); - - // The set of Objects - private Set nodes = poNodes.keySet(); - - /** - * Constructs a PartiallyOrderedSet. - */ - public PartiallyOrderedSet() {} - - public int size() { - return nodes.size(); - } - - public boolean contains(Object o) { - return nodes.contains(o); - } - - /** - * Returns an iterator over the elements contained in this - * collection, with an ordering that respects the orderings set - * by the setOrdering method. - */ - public Iterator iterator() { - return new PartialOrderIterator(poNodes.values().iterator()); - } - - /** - * Adds an Object to this - * PartiallyOrderedSet. - */ - public boolean add(Object o) { - if (nodes.contains(o)) { - return false; - } - - DigraphNode node = new DigraphNode(o); - poNodes.put(o, node); - return true; - } - - /** - * Removes an Object from this - * PartiallyOrderedSet. - */ - public boolean remove(Object o) { - DigraphNode node = (DigraphNode)poNodes.get(o); - if (node == null) { - return false; - } - - poNodes.remove(o); - node.dispose(); - return true; - } - - public void clear() { - poNodes.clear(); - } - - /** - * Sets an ordering between two nodes. When an iterator is - * requested, the first node will appear earlier in the - * sequence than the second node. If a prior ordering existed - * between the nodes in the opposite order, it is removed. - * - * @return true if no prior ordering existed - * between the nodes, falseotherwise. - */ - public boolean setOrdering(Object first, Object second) { - DigraphNode firstPONode = - (DigraphNode)poNodes.get(first); - DigraphNode secondPONode = - (DigraphNode)poNodes.get(second); - - secondPONode.removeEdge(firstPONode); - return firstPONode.addEdge(secondPONode); - } - - /** - * Removes any ordering between two nodes. - * - * @return true if a prior prefence existed between the nodes. - */ - public boolean unsetOrdering(Object first, Object second) { - DigraphNode firstPONode = - (DigraphNode)poNodes.get(first); - DigraphNode secondPONode = - (DigraphNode)poNodes.get(second); - - return firstPONode.removeEdge(secondPONode) || - secondPONode.removeEdge(firstPONode); - } - - /** - * Returns true if an ordering exists between two - * nodes. - */ - public boolean hasOrdering(Object preferred, Object other) { - DigraphNode preferredPONode = - (DigraphNode)poNodes.get(preferred); - DigraphNode otherPONode = - (DigraphNode)poNodes.get(other); - - return preferredPONode.hasEdge(otherPONode); - } -} - -class PartialOrderIterator implements Iterator { - - LinkedList zeroList = new LinkedList(); - Map inDegrees = new HashMap(); // DigraphNode -> Integer - - public PartialOrderIterator(Iterator iter) { - // Initialize scratch in-degree values, zero list - while (iter.hasNext()) { - DigraphNode node = (DigraphNode)iter.next(); - int inDegree = node.getInDegree(); - inDegrees.put(node, new Integer(inDegree)); - - // Add nodes with zero in-degree to the zero list - if (inDegree == 0) { - zeroList.add(node); - } - } - } - - public boolean hasNext() { - return !zeroList.isEmpty(); - } - - public Object next() { - DigraphNode first = (DigraphNode)zeroList.removeFirst(); - - // For each out node of the output node, decrement its in-degree - Iterator outNodes = first.getOutNodes(); - while (outNodes.hasNext()) { - DigraphNode node = (DigraphNode)outNodes.next(); - int inDegree = ((Integer)inDegrees.get(node)).intValue() - 1; - inDegrees.put(node, new Integer(inDegree)); - - // If the in-degree has fallen to 0, place the node on the list - if (inDegree == 0) { - zeroList.add(node); - } - } - - return first.getData(); - } - - public void remove() { - throw new UnsupportedOperationException(); - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/plugin/spi/RegisterableService.java b/src/main/java/edu/harvard/iq/dataverse/ingest/plugin/spi/RegisterableService.java deleted file mode 100644 index d3609b1e4b9..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/plugin/spi/RegisterableService.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - Copyright (C) 2005-2012, by the President and Fellows of Harvard College. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - Dataverse Network - A web application to share, preserve and analyze research data. - Developed at the Institute for Quantitative Social Science, Harvard University. - Version 3.0. -*/ - -package edu.harvard.iq.dataverse.ingest.plugin.spi; - -// This file was Taken out from openjdk-6-src-b16-24_apr_2009.tar.gz -// http://download.java.net/openjdk/jdk6/promoted/b16/openjdk-6-src-b16-24_apr_2009.tar.gz -// downloaded: 2009-05-07 - - -/* - * Copyright 2000-2004 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Sun designates this - * particular file as subject to the "Classpath" exception as provided - * by Sun in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - */ - - -/** - * An optional interface that may be provided by service provider - * objects that will be registered with a - * ServiceRegistry. If this interface is present, - * notification of registration and deregistration will be performed. - * - * @see ServiceRegistry - * - */ -public interface RegisterableService { - - /** - * Called when an object implementing this interface is added to - * the given category of the given - * registry. The object may already be registered - * under another category or categories. - * - * @param registry a ServiceRegistry where this - * object has been registered. - * @param category a Class object indicating the - * registry category under which this object has been registered. - */ - void onRegistration(ServiceRegistry registry, Class category); - - /** - * Called when an object implementing this interface is removed - * from the given category of the given - * registry. The object may still be registered - * under another category or categories. - * - * @param registry a ServiceRegistry from which this - * object is being (wholly or partially) deregistered. - * @param category a Class object indicating the - * registry category from which this object is being deregistered. - */ - void onDeregistration(ServiceRegistry registry, Class category); -} diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/plugin/spi/ServiceRegistry.java b/src/main/java/edu/harvard/iq/dataverse/ingest/plugin/spi/ServiceRegistry.java deleted file mode 100644 index 1794adb5de2..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/plugin/spi/ServiceRegistry.java +++ /dev/null @@ -1,861 +0,0 @@ -/* - Copyright (C) 2005-2012, by the President and Fellows of Harvard College. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - Dataverse Network - A web application to share, preserve and analyze research data. - Developed at the Institute for Quantitative Social Science, Harvard University. - Version 3.0. -*/ -package edu.harvard.iq.dataverse.ingest.plugin.spi; - - - -// This file was Taken out from openjdk-6-src-b16-24_apr_2009.tar.gz -// http://download.java.net/openjdk/jdk6/promoted/b16/openjdk-6-src-b16-24_apr_2009.tar.gz -// downloaded: 2009-05-07 - - -/* - * Copyright 2000-2007 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Sun designates this - * particular file as subject to the "Classpath" exception as provided - * by Sun in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - */ - -//package javax.imageio.spi; - -import java.io.File; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.Set; -import java.util.ServiceLoader; - -/** - * A registry for service provider instances. - * - *

          A service is a well-known set of interfaces and (usually - * abstract) classes. A service provider is a specific - * implementation of a service. The classes in a provider typically - * implement the interface or subclass the class defined by the - * service itself. - * - *

          Service providers are stored in one or more categories, - * each of which is defined by a class of interface (described by a - * Class object) that all of its members must implement. - * The set of categories may be changed dynamically. - * - *

          Only a single instance of a given leaf class (that is, the - * actual class returned by getClass(), as opposed to any - * inherited classes or interfaces) may be registered. That is, - * suppose that the - * com.mycompany.mypkg.GreenServiceProvider class - * implements the com.mycompany.mypkg.MyService - * interface. If a GreenServiceProvider instance is - * registered, it will be stored in the category defined by the - * MyService class. If a new instance of - * GreenServiceProvider is registered, it will replace - * the previous instance. In practice, service provider objects are - * usually singletons so this behavior is appropriate. - * - *

          To declare a service provider, a services - * subdirectory is placed within the META-INF directory - * that is present in every JAR file. This directory contains a file - * for each service provider interface that has one or more - * implementation classes present in the JAR file. For example, if - * the JAR file contained a class named - * com.mycompany.mypkg.MyServiceImpl which implements the - * javax.someapi.SomeService interface, the JAR file - * would contain a file named:

          - * META-INF/services/javax.someapi.SomeService 
          - * - * containing the line: - * - *
          - * com.mycompany.mypkg.MyService
          - * 
          - * - *

          The service provider classes should be to be lightweight and - * quick to load. Implementations of these interfaces should avoid - * complex dependencies on other classes and on native code. The usual - * pattern for more complex services is to register a lightweight - * proxy for the heavyweight service. - * - *

          An application may customize the contents of a registry as it - * sees fit, so long as it has the appropriate runtime permission. - * - *

          For more details on declaring service providers, and the JAR - * format in general, see the - * JAR File Specification. - * - * @see RegisterableService - * - */ -public class ServiceRegistry { - - // Class -> Registry - private Map, SubRegistry> categoryMap = new HashMap<>(); - - /** - * Constructs a ServiceRegistry instance with a - * set of categories taken from the categories - * argument. - * - * @param categories an Iterator containing - * Class objects to be used to define categories. - * - * @exception IllegalArgumentException if - * categories is null. - */ - public ServiceRegistry(Iterator> categories) { - if (categories == null) { - throw new IllegalArgumentException("categories == null!"); - } - while (categories.hasNext()) { - Class category = categories.next(); - SubRegistry reg = new SubRegistry(this, category); - categoryMap.put(category, reg); - } - } - - // The following two methods expose functionality from - // sun.misc.Service. If that class is made public, they may be - // removed. - // - // The sun.misc.ServiceConfigurationError class may also be - // exposed, in which case the references to 'an - // Error' below should be changed to 'a - // ServiceConfigurationError'. - - /** - * Searches for implementations of a particular service class - * using the given class loader. - * - *

          This method transforms the name of the given service class - * into a provider-configuration filename as described in the - * class comment and then uses the getResources - * method of the given class loader to find all available files - * with that name. These files are then read and parsed to - * produce a list of provider-class names. The iterator that is - * returned uses the given class loader to look up and then - * instantiate each element of the list. - * - *

          Because it is possible for extensions to be installed into - * a running Java virtual machine, this method may return - * different results each time it is invoked. - * - * @param providerClass a Classobject indicating the - * class or interface of the service providers being detected. - * - * @param loader the class loader to be used to load - * provider-configuration files and instantiate provider classes, - * or null if the system class loader (or, failing that - * the bootstrap class loader) is to be used. - * - * @return An Iterator that yields provider objects - * for the given service, in some arbitrary order. The iterator - * will throw an Error if a provider-configuration - * file violates the specified format or if a provider class - * cannot be found and instantiated. - * - * @exception IllegalArgumentException if - * providerClass is null. - */ - public static Iterator lookupProviders(Class providerClass, - ClassLoader loader) - { - if (providerClass == null) { - throw new IllegalArgumentException("providerClass == null!"); - } - return ServiceLoader.load(providerClass, loader).iterator(); - } - - /** - * Locates and incrementally instantiates the available providers - * of a given service using the context class loader. This - * convenience method is equivalent to: - * - *

          -     *   ClassLoader cl = Thread.currentThread().getContextClassLoader();
          -     *   return Service.providers(service, cl);
          -     * 
          - * - * @param providerClass a Classobject indicating the - * class or interface of the service providers being detected. - * - * @return An Iterator that yields provider objects - * for the given service, in some arbitrary order. The iterator - * will throw an Error if a provider-configuration - * file violates the specified format or if a provider class - * cannot be found and instantiated. - * - * @exception IllegalArgumentException if - * providerClass is null. - */ - public static Iterator lookupProviders(Class providerClass) { - if (providerClass == null) { - throw new IllegalArgumentException("providerClass == null!"); - } - return ServiceLoader.load(providerClass).iterator(); - } - - /** - * Returns an Iterator of Class objects - * indicating the current set of categories. The iterator will be - * empty if no categories exist. - * - * @return an Iterator containing - * Classobjects. - */ - public Iterator> getCategories() { - Set> keySet = categoryMap.keySet(); - return keySet.iterator(); - } - - /** - * Returns an Iterator containing the subregistries to which the - * provider belongs. - */ - private Iterator getSubRegistries(Object provider) { - List l = new ArrayList<>(); - Iterator> iter = categoryMap.keySet().iterator(); - while (iter.hasNext()) { - Class c = iter.next(); - if (c.isAssignableFrom(provider.getClass())) { - l.add(categoryMap.get(c)); - } - } - return l.iterator(); - } - - /** - * Adds a service provider object to the registry. The provider - * is associated with the given category. - * - *

          If provider implements the - * RegisterableService interface, its - * onRegistration method will be called. Its - * onDeregistration method will be called each time - * it is deregistered from a category, for example if a - * category is removed or the registry is garbage collected. - * - * @param provider the service provide object to be registered. - * @param category the category under which to register the - * provider. - * - * @return true if no provider of the same class was previously - * registered in the same category category. - * - * @exception IllegalArgumentException if provider is - * null. - * @exception IllegalArgumentException if there is no category - * corresponding to category. - * @exception ClassCastException if provider does not implement - * the Class defined by category. - */ - public boolean registerServiceProvider(T provider, - Class category) { - if (provider == null) { - throw new IllegalArgumentException("provider == null!"); - } - SubRegistry reg = categoryMap.get(category); - if (reg == null) { - throw new IllegalArgumentException("category unknown!"); - } - if (!category.isAssignableFrom(provider.getClass())) { - throw new ClassCastException(); - } - - return reg.registerServiceProvider(provider); - } - - /** - * Adds a service provider object to the registry. The provider - * is associated within each category present in the registry - * whose Class it implements. - * - *

          If provider implements the - * RegisterableService interface, its - * onRegistration method will be called once for each - * category it is registered under. Its - * onDeregistration method will be called each time - * it is deregistered from a category or when the registry is - * finalized. - * - * @param provider the service provider object to be registered. - * - * @exception IllegalArgumentException if - * provider is null. - */ - public void registerServiceProvider(Object provider) { - if (provider == null) { - throw new IllegalArgumentException("provider == null!"); - } - Iterator regs = getSubRegistries(provider); - while (regs.hasNext()) { - SubRegistry reg = regs.next(); - reg.registerServiceProvider(provider); - } - } - - /** - * Adds a set of service provider objects, taken from an - * Iterator to the registry. Each provider is - * associated within each category present in the registry whose - * Class it implements. - * - *

          For each entry of providers that implements - * the RegisterableService interface, its - * onRegistration method will be called once for each - * category it is registered under. Its - * onDeregistration method will be called each time - * it is deregistered from a category or when the registry is - * finalized. - * - * @param providers an Iterator containing service provider - * objects to be registered. - * - * @exception IllegalArgumentException if providers - * is null or contains a null entry. - */ - public void registerServiceProviders(Iterator providers) { - if (providers == null) { - throw new IllegalArgumentException("provider == null!"); - } - while (providers.hasNext()) { - registerServiceProvider(providers.next()); - } - } - - /** - * Removes a service provider object from the given category. If - * the provider was not previously registered, nothing happens and - * false is returned. Otherwise, true - * is returned. If an object of the same class as - * provider but not equal (using ==) to - * provider is registered, it will not be - * deregistered. - * - *

          If provider implements the - * RegisterableService interface, its - * onDeregistration method will be called. - * - * @param provider the service provider object to be deregistered. - * @param category the category from which to deregister the - * provider. - * - * @return true if the provider was previously - * registered in the same category category, - * false otherwise. - * - * @exception IllegalArgumentException if provider is - * null. - * @exception IllegalArgumentException if there is no category - * corresponding to category. - * @exception ClassCastException if provider does not implement - * the class defined by category. - */ - public boolean deregisterServiceProvider(T provider, - Class category) { - if (provider == null) { - throw new IllegalArgumentException("provider == null!"); - } - SubRegistry reg = categoryMap.get(category); - if (reg == null) { - throw new IllegalArgumentException("category unknown!"); - } - if (!category.isAssignableFrom(provider.getClass())) { - throw new ClassCastException(); - } - return reg.deregisterServiceProvider(provider); - } - - /** - * Removes a service provider object from all categories that - * contain it. - * - * @param provider the service provider object to be deregistered. - * - * @exception IllegalArgumentException if provider is - * null. - */ - public void deregisterServiceProvider(Object provider) { - if (provider == null) { - throw new IllegalArgumentException("provider == null!"); - } - Iterator regs = getSubRegistries(provider); - while (regs.hasNext()) { - SubRegistry reg = regs.next(); - reg.deregisterServiceProvider(provider); - } - } - - /** - * Returns true if provider is currently - * registered. - * - * @param provider the service provider object to be queried. - * - * @return true if the given provider has been - * registered. - * - * @exception IllegalArgumentException if provider is - * null. - */ - public boolean contains(Object provider) { - if (provider == null) { - throw new IllegalArgumentException("provider == null!"); - } - Iterator regs = getSubRegistries(provider); - while (regs.hasNext()) { - SubRegistry reg = regs.next(); - if (reg.contains(provider)) { - return true; - } - } - - return false; - } - - /** - * Returns an Iterator containing all registered - * service providers in the given category. If - * useOrdering is false, the iterator - * will return all of the server provider objects in an arbitrary - * order. Otherwise, the ordering will respect any pairwise - * orderings that have been set. If the graph of pairwise - * orderings contains cycles, any providers that belong to a cycle - * will not be returned. - * - * @param category the category to be retrieved from. - * @param useOrdering true if pairwise orderings - * should be taken account in ordering the returned objects. - * - * @return an Iterator containing service provider - * objects from the given category, possibly in order. - * - * @exception IllegalArgumentException if there is no category - * corresponding to category. - */ - public Iterator getServiceProviders(Class category, - boolean useOrdering) { - SubRegistry reg = categoryMap.get(category); - if (reg == null) { - throw new IllegalArgumentException("category unknown!"); - } - return reg.getServiceProviders(useOrdering); - } - - /** - * A simple filter interface used by - * ServiceRegistry.getServiceProviders to select - * providers matching an arbitrary criterion. Classes that - * implement this interface should be defined in order to make use - * of the getServiceProviders method of - * ServiceRegistry that takes a Filter. - * - * @see ServiceRegistry#getServiceProviders(Class, ServiceRegistry.Filter, boolean) - */ - public interface Filter { - - /** - * Returns true if the given - * provider object matches the criterion defined - * by this Filter. - * - * @param provider a service provider Object. - * - * @return true if the provider matches the criterion. - */ - boolean filter(Object provider); - } - - /** - * Returns an Iterator containing service provider - * objects within a given category that satisfy a criterion - * imposed by the supplied ServiceRegistry.Filter - * object's filter method. - * - *

          The useOrdering argument controls the - * ordering of the results using the same rules as - * getServiceProviders(Class, boolean). - * - * @param category the category to be retrieved from. - * @param filter an instance of ServiceRegistry.Filter - * whose filter method will be invoked. - * @param useOrdering true if pairwise orderings - * should be taken account in ordering the returned objects. - * - * @return an Iterator containing service provider - * objects from the given category, possibly in order. - * - * @exception IllegalArgumentException if there is no category - * corresponding to category. - */ - public Iterator getServiceProviders(Class category, - Filter filter, - boolean useOrdering) { - SubRegistry reg = categoryMap.get(category); - if (reg == null) { - throw new IllegalArgumentException("category unknown!"); - } - Iterator iter = getServiceProviders(category, useOrdering); - return new FilterIterator(iter, filter); - } - - /** - * Returns the currently registered service provider object that - * is of the given class type. At most one object of a given - * class is allowed to be registered at any given time. If no - * registered object has the desired class type, null - * is returned. - * - * @param providerClass the Class of the desired - * service provider object. - * - * @return a currently registered service provider object with the - * desired Classtype, or null is none is - * present. - * - * @exception IllegalArgumentException if providerClass is - * null. - */ - public T getServiceProviderByClass(Class providerClass) { - if (providerClass == null) { - throw new IllegalArgumentException("providerClass == null!"); - } - for (Class c : categoryMap.keySet()) { - if (c.isAssignableFrom(providerClass)) { - SubRegistry reg = (SubRegistry)categoryMap.get(c); - T provider = reg.getServiceProviderByClass(providerClass); - if (provider != null) { - return provider; - } - } - } - return null; - } - - /** - * Sets a pairwise ordering between two service provider objects - * within a given category. If one or both objects are not - * currently registered within the given category, or if the - * desired ordering is already set, nothing happens and - * false is returned. If the providers previously - * were ordered in the reverse direction, that ordering is - * removed. - * - *

          The ordering will be used by the - * getServiceProviders methods when their - * useOrdering argument is true. - * - * @param category a Class object indicating the - * category under which the preference is to be established. - * @param firstProvider the preferred provider. - * @param secondProvider the provider to which - * firstProvider is preferred. - * - * @return true if a previously unset ordering - * was established. - * - * @exception IllegalArgumentException if either provider is - * null or they are the same object. - * @exception IllegalArgumentException if there is no category - * corresponding to category. - */ - public boolean setOrdering(Class category, - T firstProvider, - T secondProvider) { - if (firstProvider == null || secondProvider == null) { - throw new IllegalArgumentException("provider is null!"); - } - if (firstProvider == secondProvider) { - throw new IllegalArgumentException("providers are the same!"); - } - SubRegistry reg = (SubRegistry)categoryMap.get(category); - if (reg == null) { - throw new IllegalArgumentException("category unknown!"); - } - if (reg.contains(firstProvider) && - reg.contains(secondProvider)) { - return reg.setOrdering(firstProvider, secondProvider); - } - return false; - } - - /** - * Sets a pairwise ordering between two service provider objects - * within a given category. If one or both objects are not - * currently registered within the given category, or if no - * ordering is currently set between them, nothing happens - * and false is returned. - * - *

          The ordering will be used by the - * getServiceProviders methods when their - * useOrdering argument is true. - * - * @param category a Class object indicating the - * category under which the preference is to be disestablished. - * @param firstProvider the formerly preferred provider. - * @param secondProvider the provider to which - * firstProvider was formerly preferred. - * - * @return true if a previously set ordering was - * disestablished. - * - * @exception IllegalArgumentException if either provider is - * null or they are the same object. - * @exception IllegalArgumentException if there is no category - * corresponding to category. - */ - public boolean unsetOrdering(Class category, - T firstProvider, - T secondProvider) { - if (firstProvider == null || secondProvider == null) { - throw new IllegalArgumentException("provider is null!"); - } - if (firstProvider == secondProvider) { - throw new IllegalArgumentException("providers are the same!"); - } - SubRegistry reg = (SubRegistry)categoryMap.get(category); - if (reg == null) { - throw new IllegalArgumentException("category unknown!"); - } - if (reg.contains(firstProvider) && - reg.contains(secondProvider)) { - return reg.unsetOrdering(firstProvider, secondProvider); - } - return false; - } - - /** - * Deregisters all service provider object currently registered - * under the given category. - * - * @param category the category to be emptied. - * - * @exception IllegalArgumentException if there is no category - * corresponding to category. - */ - public void deregisterAll(Class category) { - SubRegistry reg = (SubRegistry)categoryMap.get(category); - if (reg == null) { - throw new IllegalArgumentException("category unknown!"); - } - reg.clear(); - } - - /** - * Deregisters all currently registered service providers from all - * categories. - */ - public void deregisterAll() { - Iterator iter = categoryMap.values().iterator(); - while (iter.hasNext()) { - SubRegistry reg = (SubRegistry)iter.next(); - reg.clear(); - } - } - - /** - * Finalizes this object prior to garbage collection. The - * deregisterAll method is called to deregister all - * currently registered service providers. This method should not - * be called from application code. - * - * @exception Throwable if an error occurs during superclass - * finalization. - */ - public void finalize() throws Throwable { - deregisterAll(); - super.finalize(); - } -} - - -/** - * A portion of a registry dealing with a single superclass or - * interface. - */ -class SubRegistry { - - ServiceRegistry registry; - - Class category; - - // Provider Objects organized by partial oridering - PartiallyOrderedSet poset = new PartiallyOrderedSet(); - - // Class -> Provider Object of that class - Map,Object> map = new HashMap<>(); - - public SubRegistry(ServiceRegistry registry, Class category) { - this.registry = registry; - this.category = category; - } - - public boolean registerServiceProvider(Object provider) { - Object oprovider = map.get(provider.getClass()); - boolean present = oprovider != null; - - if (present) { - deregisterServiceProvider(oprovider); - } - map.put(provider.getClass(), provider); - poset.add(provider); - if (provider instanceof RegisterableService) { - RegisterableService rs = (RegisterableService)provider; - rs.onRegistration(registry, category); - } - - return !present; - } - - /** - * If the provider was not previously registered, do nothing. - * - * @return true if the provider was previously registered. - */ - public boolean deregisterServiceProvider(Object provider) { - Object oprovider = map.get(provider.getClass()); - - if (provider == oprovider) { - map.remove(provider.getClass()); - poset.remove(provider); - if (provider instanceof RegisterableService) { - RegisterableService rs = (RegisterableService)provider; - rs.onDeregistration(registry, category); - } - - return true; - } - return false; - } - - public boolean contains(Object provider) { - Object oprovider = map.get(provider.getClass()); - return oprovider == provider; - } - - public boolean setOrdering(Object firstProvider, - Object secondProvider) { - return poset.setOrdering(firstProvider, secondProvider); - } - - public boolean unsetOrdering(Object firstProvider, - Object secondProvider) { - return poset.unsetOrdering(firstProvider, secondProvider); - } - - public Iterator getServiceProviders(boolean useOrdering) { - if (useOrdering) { - return poset.iterator(); - } else { - return map.values().iterator(); - } - } - - public T getServiceProviderByClass(Class providerClass) { - return (T)map.get(providerClass); - } - - public void clear() { - Iterator iter = map.values().iterator(); - while (iter.hasNext()) { - Object provider = iter.next(); - iter.remove(); - - if (provider instanceof RegisterableService) { - RegisterableService rs = (RegisterableService)provider; - rs.onDeregistration(registry, category); - } - } - poset.clear(); - } - - public void finalize() { - clear(); - } -} - - -/** - * A class for wrapping Iterators with a filter function. - * This provides an iterator for a subset without duplication. - */ -class FilterIterator implements Iterator { - - private Iterator iter; - private ServiceRegistry.Filter filter; - - private T next = null; - - public FilterIterator(Iterator iter, - ServiceRegistry.Filter filter) { - this.iter = iter; - this.filter = filter; - advance(); - } - - private void advance() { - while (iter.hasNext()) { - T elt = iter.next(); - if (filter.filter(elt)) { - next = elt; - return; - } - } - - next = null; - } - - public boolean hasNext() { - return next != null; - } - - public T next() { - if (next == null) { - throw new NoSuchElementException(); - } - T o = next; - advance(); - return o; - } - - public void remove() { - throw new UnsupportedOperationException(); - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java index 223b171dfb5..0f23a3d9781 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java @@ -20,10 +20,13 @@ package edu.harvard.iq.dataverse.ingest.tabulardata; +import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.ingest.tabulardata.spi.*; //import edu.harvard.iq.dataverse.ingest.plugin.metadata.*; import java.io.*; import static java.lang.System.*; +import java.util.Iterator; +import java.util.List; import java.util.regex.Matcher; /** @@ -98,7 +101,7 @@ public void setDataLanguageEncoding(String dataLanguageEncoding) { * * @throws java.io.IOException if a reading error occurs. */ - public abstract TabularDataIngest read(BufferedInputStream stream, File dataFile) + public abstract TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException; @@ -176,5 +179,26 @@ protected String escapeCharacterString(String rawString) { return escapedString; } + + protected String generateVariableHeader(List dvs) { + String varHeader = null; + + if (dvs != null) { + Iterator iter = dvs.iterator(); + DataVariable dv; + + if (iter.hasNext()) { + dv = iter.next(); + varHeader = dv.getName(); + } + + while (iter.hasNext()) { + dv = iter.next(); + varHeader = varHeader + "\t" + dv.getName(); + } + } + + return varHeader; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java index 57f76df3802..f8816ababb4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java @@ -110,7 +110,7 @@ private void init() throws IOException { * @throws java.io.IOException if a reading error occurs. */ @Override - public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException { + public TabularDataIngest read(BufferedInputStream stream, boolean saveWithVariableHeader, File dataFile) throws IOException { init(); if (stream == null) { @@ -124,7 +124,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws File tabFileDestination = File.createTempFile("data-", ".tab"); PrintWriter tabFileWriter = new PrintWriter(tabFileDestination.getAbsolutePath()); - int lineCount = readFile(localBufferedReader, dataTable, tabFileWriter); + int lineCount = readFile(localBufferedReader, dataTable, saveWithVariableHeader, tabFileWriter); logger.fine("Tab file produced: " + tabFileDestination.getAbsolutePath()); @@ -136,14 +136,17 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws } - public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter finalOut) throws IOException { + public int readFile(BufferedReader csvReader, DataTable dataTable, boolean saveWithVariableHeader, PrintWriter finalOut) throws IOException { List variableList = new ArrayList<>(); CSVParser parser = new CSVParser(csvReader, inFormat.withHeader()); Map headers = parser.getHeaderMap(); int i = 0; + String variableNameHeader = null; + for (String varName : headers.keySet()) { + // @todo: is .keySet() guaranteed to return the names in the right order? if (varName == null || varName.isEmpty()) { // TODO: // Add a sensible variable name validation algorithm. @@ -158,6 +161,13 @@ public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter f dv.setTypeCharacter(); dv.setIntervalDiscrete(); + + if (saveWithVariableHeader) { + variableNameHeader = variableNameHeader == null + ? varName + : variableNameHeader.concat("\t" + varName); + } + i++; } @@ -342,6 +352,14 @@ public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter f try (BufferedReader secondPassReader = new BufferedReader(new FileReader(firstPassTempFile))) { parser = new CSVParser(secondPassReader, inFormat.withHeader()); String[] caseRow = new String[headers.size()]; + + // Save the variable name header, if requested + if (saveWithVariableHeader) { + if (variableNameHeader == null) { + throw new IOException("failed to generate the Variable Names header"); + } + finalOut.println(variableNameHeader); + } for (CSVRecord record : parser) { if (!record.isConsistent()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java index 2dec701592e..73818f8fb62 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java @@ -505,7 +505,7 @@ private void init() throws IOException { } @Override - public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException { + public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException { dbgLog.info("***** DTAFileReader: read() start *****"); if (dataFile != null) { @@ -519,7 +519,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws if (releaseNumber!=104) { decodeExpansionFields(stream); } - decodeData(stream); + decodeData(stream, storeWithVariableHeader); decodeValueLabels(stream); ingesteddata.setDataTable(dataTable); @@ -1665,7 +1665,7 @@ private void parseValueLabelsReleasel108(BufferedInputStream stream) throws IOEx dbgLog.fine("parseValueLabelsRelease108(): end"); } - private void decodeData(BufferedInputStream stream) throws IOException { + private void decodeData(BufferedInputStream stream, boolean saveWithVariableHeader) throws IOException { dbgLog.fine("\n***** decodeData(): start *****"); @@ -1719,6 +1719,11 @@ private void decodeData(BufferedInputStream stream) throws IOException { BUT, this needs to be reviewed/confirmed etc! */ //String[][] dateFormat = new String[nvar][nobs]; + + // add the variable header here, if needed + if (saveWithVariableHeader) { + pwout.println(generateVariableHeader(dataTable.getDataVariables())); + } for (int i = 0; i < nobs; i++) { byte[] dataRowBytes = new byte[bytes_per_row]; diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java index 22581834676..53607d541de 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java @@ -339,7 +339,7 @@ private void init() throws IOException { } @Override - public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException { + public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException { logger.fine("NewDTAFileReader: read() start"); // shit ton of diagnostics (still) needed here!! -- L.A. @@ -363,7 +363,13 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws // "characteristics" - STATA-proprietary information // (we are skipping it) readCharacteristics(dataReader); - readData(dataReader); + + String variableHeaderLine = null; + + if (storeWithVariableHeader) { + variableHeaderLine = generateVariableHeader(dataTable.getDataVariables()); + } + readData(dataReader, variableHeaderLine); // (potentially) large, (potentially) non-ASCII character strings // saved outside the section, and referenced @@ -707,7 +713,7 @@ private void readCharacteristics(DataReader reader) throws IOException { } - private void readData(DataReader reader) throws IOException { + private void readData(DataReader reader, String variableHeaderLine) throws IOException { logger.fine("Data section; at offset " + reader.getByteOffset() + "; dta map offset: " + dtaMap.getOffset_data()); logger.fine("readData(): start"); reader.readOpeningTag(TAG_DATA); @@ -731,6 +737,11 @@ private void readData(DataReader reader) throws IOException { FileOutputStream fileOutTab = new FileOutputStream(tabDelimitedDataFile); PrintWriter pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true); + // add the variable header here, if needed + if (variableHeaderLine != null) { + pwout.println(variableHeaderLine); + } + logger.fine("Beginning to read data stream."); for (int i = 0; i < nobs; i++) { @@ -999,6 +1010,8 @@ private void readSTRLs(DataReader reader) throws IOException { int nobs = dataTable.getCaseQuantity().intValue(); String[] line; + + //@todo: adjust for the case of storing the file with the variable header for (int obsindex = 0; obsindex < nobs; obsindex++) { if (scanner.hasNext()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java index c90b0ea6950..2ee966c3e31 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java @@ -180,7 +180,7 @@ private void init() throws IOException { } @Override - public TabularDataIngest read(BufferedInputStream stream, File additionalData) throws IOException{ + public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File additionalData) throws IOException{ dbgLog.fine("PORFileReader: read() start"); if (additionalData != null) { @@ -226,7 +226,7 @@ public TabularDataIngest read(BufferedInputStream stream, File additionalData) t headerId = "8S"; } - decode(headerId, bfReader); + decode(headerId, bfReader, storeWithVariableHeader); // for last iteration @@ -382,7 +382,7 @@ public TabularDataIngest read(BufferedInputStream stream, File additionalData) t return ingesteddata; } - private void decode(String headerId, BufferedReader reader) throws IOException{ + private void decode(String headerId, BufferedReader reader, boolean storeWithVariableHeader) throws IOException{ if (headerId.equals("1")) decodeProductName(reader); else if (headerId.equals("2")) decodeLicensee(reader); else if (headerId.equals("3")) decodeFileLabel(reader); @@ -398,7 +398,7 @@ private void decode(String headerId, BufferedReader reader) throws IOException{ else if (headerId.equals("C")) decodeVariableLabel(reader); else if (headerId.equals("D")) decodeValueLabel(reader); else if (headerId.equals("E")) decodeDocument(reader); - else if (headerId.equals("F")) decodeData(reader); + else if (headerId.equals("F")) decodeData(reader, storeWithVariableHeader); } @@ -1099,7 +1099,7 @@ private void decodeDocument(BufferedReader reader) throws IOException { } - private void decodeData(BufferedReader reader) throws IOException { + private void decodeData(BufferedReader reader, boolean storeWithVariableHeader) throws IOException { dbgLog.fine("decodeData(): start"); // TODO: get rid of this "variableTypeFinal"; -- L.A. 4.0 beta int[] variableTypeFinal= new int[varQnty]; @@ -1126,6 +1126,9 @@ private void decodeData(BufferedReader reader) throws IOException { // contents (variable) checker concering decimals Arrays.fill(variableTypeFinal, 0); + if (storeWithVariableHeader) { + pwout.println(StringUtils.join(variableNameList, "\t")); + } // raw-case counter int j = 0; // case diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java index eb1353fd792..50f2f89e354 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java @@ -473,7 +473,7 @@ private void init() throws IOException { * @throws java.io.IOException if a reading error occurs. */ @Override - public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException { + public TabularDataIngest read(BufferedInputStream stream, boolean saveWithVariableHeader, File dataFile) throws IOException { init(); @@ -509,7 +509,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws File tabFileDestination = File.createTempFile("data-", ".tab"); PrintWriter tabFileWriter = new PrintWriter(tabFileDestination.getAbsolutePath(), "UTF-8"); - int lineCount = csvFileReader.read(localBufferedReader, dataTable, tabFileWriter); + int lineCount = csvFileReader.read(localBufferedReader, dataTable, saveWithVariableHeader, tabFileWriter); LOG.fine("RDATAFileReader: successfully read "+lineCount+" lines of tab-delimited data."); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java index f60b7733463..fbe7e401b57 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java @@ -61,8 +61,8 @@ public RTabFileParser (char delimiterChar) { // should be used. - public int read(BufferedReader csvReader, DataTable dataTable, PrintWriter pwout) throws IOException { - dbgLog.warning("RTabFileParser: Inside R Tab file parser"); + public int read(BufferedReader csvReader, DataTable dataTable, boolean saveWithVariableHeader, PrintWriter pwout) throws IOException { + dbgLog.fine("RTabFileParser: Inside R Tab file parser"); int varQnty = 0; @@ -94,14 +94,17 @@ public int read(BufferedReader csvReader, DataTable dataTable, PrintWriter pwout boolean[] isTimeVariable = new boolean[varQnty]; boolean[] isBooleanVariable = new boolean[varQnty]; + String variableNameHeader = null; + if (dataTable.getDataVariables() != null) { for (int i = 0; i < varQnty; i++) { DataVariable var = dataTable.getDataVariables().get(i); if (var == null) { - // throw exception! + throw new IOException ("null dataVariable passed to the parser"); + } if (var.getType() == null) { - // throw exception! + throw new IOException ("null dataVariable type passed to the parser"); } if (var.isTypeCharacter()) { isCharacterVariable[i] = true; @@ -128,13 +131,24 @@ public int read(BufferedReader csvReader, DataTable dataTable, PrintWriter pwout } } } else { - // throw excepion "unknown variable format type" - ? + throw new IOException ("unknown dataVariable format passed to the parser"); } - + if (saveWithVariableHeader) { + variableNameHeader = variableNameHeader == null + ? var.getName() + : variableNameHeader.concat("\t" + var.getName()); + } } } else { - // throw exception! + throw new IOException ("null dataVariables list passed to the parser"); + } + + if (saveWithVariableHeader) { + if (variableNameHeader == null) { + throw new IOException ("failed to generate the Variable Names header"); + } + pwout.println(variableNameHeader); } while ((line = csvReader.readLine()) != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java index 682b8f1166c..5eecbdfb666 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java @@ -338,7 +338,7 @@ private void init() throws IOException { } } - public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException{ + public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException{ dbgLog.info("SAVFileReader: read() start"); if (dataFile != null) { @@ -422,7 +422,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws methodCurrentlyExecuted = "decodeRecordTypeData"; dbgLog.fine("***** SAVFileReader: executing method decodeRecordTypeData"); - decodeRecordTypeData(stream); + decodeRecordTypeData(stream, storeWithVariableHeader); } catch (IllegalArgumentException e) { @@ -2308,7 +2308,7 @@ void decodeRecordType999(BufferedInputStream stream) throws IOException { - void decodeRecordTypeData(BufferedInputStream stream) throws IOException { + void decodeRecordTypeData(BufferedInputStream stream, boolean storeWithVariableHeader) throws IOException { dbgLog.fine("decodeRecordTypeData(): start"); ///String fileUnfValue = null; @@ -2320,9 +2320,9 @@ void decodeRecordTypeData(BufferedInputStream stream) throws IOException { throw new IllegalArgumentException("stream == null!"); } if (isDataSectionCompressed){ - decodeRecordTypeDataCompressed(stream); + decodeRecordTypeDataCompressed(stream, storeWithVariableHeader); } else { - decodeRecordTypeDataUnCompressed(stream); + decodeRecordTypeDataUnCompressed(stream, storeWithVariableHeader); } /* UNF calculation was here... */ @@ -2362,7 +2362,7 @@ PrintWriter createOutputWriter (BufferedInputStream stream) throws IOException { } - void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOException { + void decodeRecordTypeDataCompressed(BufferedInputStream stream, boolean storeWithVariableHeader) throws IOException { dbgLog.fine("***** decodeRecordTypeDataCompressed(): start *****"); @@ -2395,7 +2395,10 @@ void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOExcepti dbgLog.fine("printFormatTable:\n" + printFormatTable); variableFormatTypeList = new String[varQnty]; - + // write the variable header out, if instructed to do so + if (storeWithVariableHeader) { + pwout.println(generateVariableHeader(dataTable.getDataVariables())); + } for (int i = 0; i < varQnty; i++) { variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE.get( @@ -2947,7 +2950,7 @@ void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOExcepti } - void decodeRecordTypeDataUnCompressed(BufferedInputStream stream) throws IOException { + void decodeRecordTypeDataUnCompressed(BufferedInputStream stream, boolean storeWithVariableHeader) throws IOException { dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): start *****"); if (stream ==null){ @@ -3013,6 +3016,11 @@ void decodeRecordTypeDataUnCompressed(BufferedInputStream stream) throws IOExcep ///dataTable2 = new Object[varQnty][caseQnty]; // storage of date formats to pass to UNF ///dateFormats = new String[varQnty][caseQnty]; + + // write the variable header out, if instructed to do so + if (storeWithVariableHeader) { + pwout.println(generateVariableHeader(dataTable.getDataVariables())); + } try { for (int i = 0; ; i++){ // case-wise loop diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java index ea3f3868f24..ef91793690e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java @@ -36,7 +36,6 @@ import org.apache.commons.lang3.StringUtils; import org.apache.poi.xssf.eventusermodel.XSSFReader; -import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.apache.poi.xssf.model.SharedStrings; import org.apache.poi.openxml4j.opc.OPCPackage; import org.xml.sax.Attributes; @@ -81,7 +80,9 @@ private void init() throws IOException { * @throws java.io.IOException if a reading error occurs. */ @Override - public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException { + public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException { + // @todo: implement handling of "saveWithVariableHeader" option + init(); TabularDataIngest ingesteddata = new TabularDataIngest(); @@ -118,6 +119,10 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws String[] caseRow = new String[varQnty]; String[] valueTokens; + // add the variable header here, if needed + if (storeWithVariableHeader) { + finalWriter.println(generateVariableHeader(dataTable.getDataVariables())); + } while ((line = secondPassReader.readLine()) != null) { // chop the line: @@ -549,7 +554,7 @@ public static void main(String[] args) throws Exception { BufferedInputStream xlsxInputStream = new BufferedInputStream(new FileInputStream(new File(args[0]))); - TabularDataIngest dataIngest = testReader.read(xlsxInputStream, null); + TabularDataIngest dataIngest = testReader.read(xlsxInputStream, false, null); dataTable = dataIngest.getDataTable(); diff --git a/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetMetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetMetricsServiceBean.java index 0925c164bf4..0fb7e9f1e6c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetMetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetMetricsServiceBean.java @@ -3,7 +3,8 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetServiceBean; -import java.io.StringReader; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + import java.math.BigDecimal; import java.util.ArrayList; import java.util.List; @@ -14,10 +15,8 @@ import jakarta.ejb.EJBException; import jakarta.ejb.Stateless; import jakarta.inject.Named; -import jakarta.json.Json; import jakarta.json.JsonArray; import jakarta.json.JsonObject; -import jakarta.json.JsonReader; import jakarta.json.JsonValue; import jakarta.persistence.EntityManager; import jakarta.persistence.PersistenceContext; @@ -125,9 +124,7 @@ public List parseSushiReport(JsonObject report, Dataset dataset) List datasetMetricsDataset = new ArrayList<>(); String globalId = null; Dataset ds = null; - StringReader rdr = new StringReader(reportDataset.toString()); - JsonReader jrdr = Json.createReader(rdr); - JsonObject obj = jrdr.readObject(); + JsonObject obj = JsonUtil.getJsonObject(reportDataset.toString()); String jsonGlobalId = ""; String globalIdType = ""; if (obj.containsKey("dataset-id")) { diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index 065b42e5afe..a74474efa15 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.GuestbookResponse; import edu.harvard.iq.dataverse.Metric; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountUtil.MetricType; @@ -50,7 +51,7 @@ public class MetricsServiceBean implements Serializable { /** Dataverses */ - + public JsonArray getDataversesTimeSeries(UriInfo uriInfo, Dataverse d) { Query query = em.createNativeQuery("" + "select distinct to_char(date_trunc('month', dvobject.publicationdate),'YYYY-MM') as month, count(date_trunc('month', dvobject.publicationdate))\n" @@ -63,7 +64,7 @@ public JsonArray getDataversesTimeSeries(UriInfo uriInfo, Dataverse d) { List results = query.getResultList(); return MetricsUtil.timeSeriesToJson(results); } - + /** * @param yyyymm Month in YYYY-MM format. * @param d @@ -128,9 +129,9 @@ public List dataversesBySubject(Dataverse d) { /** Datasets */ - + public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dataverse d) { - Query query = em.createNativeQuery( + Query query = em.createNativeQuery( "select distinct date, count(dataset_id)\n" + "from (\n" + "select min(to_char(COALESCE(releasetime, createtime), 'YYYY-MM')) as date, dataset_id\n" @@ -148,8 +149,8 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat List results = query.getResultList(); return MetricsUtil.timeSeriesToJson(results); } - - + + /** * @param yyyymm Month in YYYY-MM format. * @param d @@ -179,10 +180,10 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { // But do not use this notation if you need the values returned to // meaningfully identify the datasets! - + Query query = em.createNativeQuery( - - + + "select count(*)\n" + "from (\n" + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" @@ -311,7 +312,7 @@ public JsonArray filesTimeSeries(Dataverse d) { return MetricsUtil.timeSeriesToJson(results); } - + /** * @param yyyymm Month in YYYY-MM format. * @param d @@ -388,7 +389,7 @@ public JsonArray filesByType(Dataverse d) { return jab.build(); } - + public JsonArray filesByTypeTimeSeries(Dataverse d, boolean published) { Query query = em.createNativeQuery("SELECT DISTINCT to_char(" + (published ? "ob.publicationdate" : "ob.createdate") + ",'YYYY-MM') as date, df.contenttype, count(df.id), coalesce(sum(df.filesize),0) " + " FROM DataFile df, DvObject ob" @@ -401,13 +402,13 @@ public JsonArray filesByTypeTimeSeries(Dataverse d, boolean published) { logger.log(Level.FINE, "Metric query: {0}", query); List results = query.getResultList(); return MetricsUtil.timeSeriesByTypeToJson(results); - + } - /** Downloads + /** Downloads * @param d * @throws ParseException */ - + public JsonArray downloadsTimeSeries(Dataverse d) { // ToDo - published only? Query earlyDateQuery = em.createNativeQuery("" @@ -424,17 +425,18 @@ public JsonArray downloadsTimeSeries(Dataverse d) { + "select distinct COALESCE(to_char(responsetime, 'YYYY-MM'),'" + earliest + "') as date, count(id)\n" + "from guestbookresponse\n" + ((d == null) ? "" : "where dataset_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ")") + + ((d == null) ? "where ":" and ") + "eventtype!='" + GuestbookResponse.ACCESS_REQUEST +"'\n" + " group by COALESCE(to_char(responsetime, 'YYYY-MM'),'" + earliest + "') order by COALESCE(to_char(responsetime, 'YYYY-MM'),'" + earliest + "');"); logger.log(Level.FINE, "Metric query: {0}", query); List results = query.getResultList(); return MetricsUtil.timeSeriesToJson(results); } - + /* * This includes getting historic download without a timestamp if query * is earlier than earliest timestamped record - * + * * @param yyyymm Month in YYYY-MM format. */ public long downloadsToMonth(String yyyymm, Dataverse d) throws ParseException { @@ -456,7 +458,8 @@ public long downloadsToMonth(String yyyymm, Dataverse d) throws ParseException { + "from guestbookresponse\n" + "where (date_trunc('month', responsetime) <= to_date('" + yyyymm + "','YYYY-MM')" + "or responsetime is NULL)\n" // includes historic guestbook records without date - + ((d==null) ? ";": "AND dataset_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ");") + + "and eventtype!='" + GuestbookResponse.ACCESS_REQUEST +"'\n" + + ((d==null) ? ";": "AND dataset_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ");") ); logger.log(Level.FINE, "Metric query: {0}", query); return (long) query.getSingleResult(); @@ -477,18 +480,20 @@ public long downloadsPastDays(int days, Dataverse d) { + "select count(id)\n" + "from guestbookresponse\n" + "where responsetime > current_date - interval '" + days + "' day\n" + + "and eventtype!='" + GuestbookResponse.ACCESS_REQUEST +"'\n" + ((d==null) ? ";": "AND dataset_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ");") ); logger.log(Level.FINE, "Metric query: {0}", query); return (long) query.getSingleResult(); } - + public JsonArray fileDownloadsTimeSeries(Dataverse d, boolean uniqueCounts) { Query query = em.createNativeQuery("select distinct to_char(gb.responsetime, 'YYYY-MM') as date, ob.id, ob.protocol || ':' || ob.authority || '/' || ob.identifier as pid, count(" + (uniqueCounts ? "distinct email" : "*") + ") " + " FROM guestbookresponse gb, DvObject ob" + " where ob.id = gb.datafile_id " + ((d == null) ? "" : " and ob.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ")\n") + + "and eventtype!='" + GuestbookResponse.ACCESS_REQUEST +"'\n" + "group by gb.datafile_id, ob.id, ob.protocol, ob.authority, ob.identifier, to_char(gb.responsetime, 'YYYY-MM') order by to_char(gb.responsetime, 'YYYY-MM');"); logger.log(Level.FINE, "Metric query: {0}", query); @@ -496,13 +501,14 @@ public JsonArray fileDownloadsTimeSeries(Dataverse d, boolean uniqueCounts) { return MetricsUtil.timeSeriesByIDAndPIDToJson(results); } - + public JsonArray fileDownloads(String yyyymm, Dataverse d, boolean uniqueCounts) { Query query = em.createNativeQuery("select ob.id, ob.protocol || ':' || ob.authority || '/' || ob.identifier as pid, count(" + (uniqueCounts ? "distinct email" : "*") + ") " + " FROM guestbookresponse gb, DvObject ob" + " where ob.id = gb.datafile_id " + ((d == null) ? "" : " and ob.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ")\n") + " and date_trunc('month', gb.responsetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + + "and eventtype!='" + GuestbookResponse.ACCESS_REQUEST +"'\n" + "group by gb.datafile_id, ob.id, ob.protocol, ob.authority, ob.identifier order by count desc;"); logger.log(Level.FINE, "Metric query: {0}", query); @@ -529,6 +535,7 @@ public JsonArray uniqueDownloadsTimeSeries(Dataverse d) { + " FROM guestbookresponse gb, DvObject ob" + " where ob.id = gb.dataset_id " + ((d == null) ? "" : " and ob.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") + + "and eventtype!='" + GuestbookResponse.ACCESS_REQUEST +"'\n" + "group by gb.dataset_id, ob.protocol, ob.authority, ob.identifier, to_char(gb.responsetime, 'YYYY-MM') order by to_char(gb.responsetime, 'YYYY-MM');"); logger.log(Level.FINE, "Metric query: {0}", query); @@ -536,7 +543,7 @@ public JsonArray uniqueDownloadsTimeSeries(Dataverse d) { return MetricsUtil.timeSeriesByPIDToJson(results); } - + public JsonArray uniqueDatasetDownloads(String yyyymm, Dataverse d) { //select distinct count(distinct email),dataset_id, date_trunc('month', responsetime) from guestbookresponse group by dataset_id, date_trunc('month',responsetime) order by dataset_id,date_trunc('month',responsetime); @@ -546,6 +553,7 @@ public JsonArray uniqueDatasetDownloads(String yyyymm, Dataverse d) { + " where ob.id = gb.dataset_id " + ((d == null) ? "" : " and ob.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") + " and date_trunc('month', responsetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + + "and eventtype!='" + GuestbookResponse.ACCESS_REQUEST +"'\n" + "group by gb.dataset_id, ob.protocol, ob.authority, ob.identifier order by count(distinct email) desc;"); JsonArrayBuilder jab = Json.createArrayBuilder(); try { @@ -563,10 +571,58 @@ public JsonArray uniqueDatasetDownloads(String yyyymm, Dataverse d) { return jab.build(); } - - //MDC - - + + //Accounts + + /* + * + * @param yyyymm Month in YYYY-MM format. + */ + public long accountsToMonth(String yyyymm) throws ParseException { + Query query = em.createNativeQuery("" + + "select count(authenticateduser.id)\n" + + "from authenticateduser\n" + + "where authenticateduser.createdtime is not null\n" + + "and date_trunc('month', createdtime) <= to_date('" + yyyymm + "','YYYY-MM');" + ); + logger.log(Level.FINE, "Metric query: {0}", query); + + return (long) query.getSingleResult(); + } + + /* + * + * @param days interval since the current date to list + * the number of user accounts created + */ + public long accountsPastDays(int days) { + Query query = em.createNativeQuery("" + + "select count(id)\n" + + "from authenticateduser\n" + + "where authenticateduser.createdtime is not null\n" + + "and authenticateduser.createdtime > current_date - interval '" + days + "' day;" + ); + logger.log(Level.FINE, "Metric query: {0}", query); + + return (long) query.getSingleResult(); + } + + public JsonArray accountsTimeSeries() { + Query query = em.createNativeQuery("" + + "select distinct to_char(au.createdtime, 'YYYY-MM'), count(id)\n" + + "from authenticateduser as au\n" + + "where au.createdtime is not null\n" + + "group by to_char(au.createdtime, 'YYYY-MM')\n" + + "order by to_char(au.createdtime, 'YYYY-MM');"); + + logger.log(Level.FINE, "Metric query: {0}", query); + List results = query.getResultList(); + return MetricsUtil.timeSeriesToJson(results); + } + + //MDC + + public JsonArray mdcMetricTimeSeries(MetricType metricType, String country, Dataverse d) { Query query = em.createNativeQuery("SELECT distinct substring(monthyear from 1 for 7) as date, coalesce(sum(" + metricType.toString() + "),0) as count FROM DatasetMetrics\n" + ((d == null) ? "" : "WHERE dataset_id in ( " + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ")\n") @@ -738,7 +794,7 @@ public Metric getMetric(String name, String dataLocation, String dayString, Data // https://github.com/DANS-KNAW/dataverse/blob/dans-develop/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsDansServiceBean.java /** - * + * * @param dvId - parent dataverse id * @param dtype - type of object to return 'Dataverse' or 'Dataset' * @return - list of objects of specified type included in the subtree (includes parent dataverse if dtype is 'Dataverse') @@ -760,7 +816,7 @@ private String getCommaSeparatedIdStringForSubtree(Dataverse d, String dtype) { } private List getChildrenIdsRecursively(Long dvId, String dtype, DatasetVersion.VersionState versionState) { - + //Intended to be called only with dvId != null String sql = "WITH RECURSIVE querytree AS (\n" + " SELECT id, dtype, owner_id, publicationdate\n" diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java index e9898031343..0a64f42d840 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java @@ -39,7 +39,6 @@ import jakarta.ws.rs.Path; import jakarta.ws.rs.Produces; import jakarta.ws.rs.QueryParam; -import jakarta.ws.rs.DefaultValue; import jakarta.ws.rs.container.ContainerRequestContext; import jakarta.ws.rs.core.Context; @@ -226,7 +225,12 @@ private SolrQueryResponse getTotalCountsFromSolr(DataverseRequest dataverseReque //SearchFields.RELEASE_OR_CREATE_DATE, SortBy.DESCENDING, 0, //paginationStart, true, // dataRelatedToMe - SearchConstants.NUM_SOLR_DOCS_TO_RETRIEVE //10 // SearchFields.NUM_SOLR_DOCS_TO_RETRIEVE + SearchConstants.NUM_SOLR_DOCS_TO_RETRIEVE, //10 // SearchFields.NUM_SOLR_DOCS_TO_RETRIEVE + true, + null, + null, + false, // no need to request facets here ... + false // ... same for highlights ); } catch (SearchException ex) { logger.severe("Search for total counts failed with filter query"); diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFinder.java b/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFinder.java index 917884f3549..4bd9ce2e00d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFinder.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFinder.java @@ -239,7 +239,7 @@ private List getSolrFilterQueries(boolean totalCountsOnly){ //fq=publicationStatus:"Unpublished"&fq=publicationStatus:"Draft" // ----------------------------------------------------------------- - // (4) FQ by dataset metadata vlidity + // (4) FQ by dataset metadata validity // ----------------------------------------------------------------- filterQueries.add(this.filterParams.getSolrFragmentForDatasetValidity()); //fq=datasetValid:(true OR false) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java new file mode 100644 index 00000000000..a3dcf6cbb3b --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java @@ -0,0 +1,550 @@ +package edu.harvard.iq.dataverse.pidproviders; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.util.SystemConfig; +import jakarta.json.Json; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; + +import java.util.*; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.commons.lang3.RandomStringUtils; +import com.beust.jcommander.Strings; + +public abstract class AbstractPidProvider implements PidProvider { + + private static final Logger logger = Logger.getLogger(AbstractPidProvider.class.getCanonicalName()); + + public static String UNAVAILABLE = ":unav"; + public static final String SEPARATOR = "/"; + + protected PidProviderFactoryBean pidProviderService; + + private String protocol; + + private String authority = null; + + private String shoulder = null; + + private String identifierGenerationStyle = null; + + private String datafilePidFormat = null; + + private HashSet managedSet; + + private HashSet excludedSet; + + private String id; + private String label; + + protected AbstractPidProvider(String id, String label, String protocol) { + this.id = id; + this.label = label; + this.protocol = protocol; + this.managedSet = new HashSet(); + this.excludedSet = new HashSet(); + } + + protected AbstractPidProvider(String id, String label, String protocol, String authority, String shoulder, + String identifierGenerationStyle, String datafilePidFormat, String managedList, String excludedList) { + this.id = id; + this.label = label; + this.protocol = protocol; + this.authority = authority; + this.shoulder = shoulder; + this.identifierGenerationStyle = identifierGenerationStyle; + this.datafilePidFormat = datafilePidFormat; + this.managedSet = new HashSet(Arrays.asList(managedList.split(",\\s"))); + this.excludedSet = new HashSet(Arrays.asList(excludedList.split(",\\s"))); + if (logger.isLoggable(Level.FINE)) { + Iterator iter = managedSet.iterator(); + while (iter.hasNext()) { + logger.fine("managedSet in " + getId() + ": " + iter.next()); + } + iter = excludedSet.iterator(); + while (iter.hasNext()) { + logger.fine("excludedSet in " + getId() + ": " + iter.next()); + } + } + } + + @Override + public Map getMetadataForCreateIndicator(DvObject dvObjectIn) { + logger.log(Level.FINE, "getMetadataForCreateIndicator(DvObject)"); + Map metadata = new HashMap<>(); + metadata = addBasicMetadata(dvObjectIn, metadata); + metadata.put("datacite.publicationyear", generateYear(dvObjectIn)); + metadata.put("_target", getTargetUrl(dvObjectIn)); + return metadata; + } + + protected Map getUpdateMetadata(DvObject dvObjectIn) { + logger.log(Level.FINE, "getUpdateMetadataFromDataset"); + Map metadata = new HashMap<>(); + metadata = addBasicMetadata(dvObjectIn, metadata); + return metadata; + } + + protected Map addBasicMetadata(DvObject dvObjectIn, Map metadata) { + + String authorString = dvObjectIn.getAuthorString(); + if (authorString.isEmpty() || authorString.contains(DatasetField.NA_VALUE)) { + authorString = UNAVAILABLE; + } + + String producerString = pidProviderService.getProducer(); + + if (producerString.isEmpty() || producerString.equals(DatasetField.NA_VALUE)) { + producerString = UNAVAILABLE; + } + + String titleString = dvObjectIn.getCurrentName(); + + if (titleString.isEmpty() || titleString.equals(DatasetField.NA_VALUE)) { + titleString = UNAVAILABLE; + } + + metadata.put("datacite.creator", authorString); + metadata.put("datacite.title", titleString); + metadata.put("datacite.publisher", producerString); + metadata.put("datacite.publicationyear", generateYear(dvObjectIn)); + return metadata; + } + + protected Map addDOIMetadataForDestroyedDataset(DvObject dvObjectIn) { + Map metadata = new HashMap<>(); + String authorString = UNAVAILABLE; + String producerString = UNAVAILABLE; + String titleString = "This item has been removed from publication"; + + metadata.put("datacite.creator", authorString); + metadata.put("datacite.title", titleString); + metadata.put("datacite.publisher", producerString); + metadata.put("datacite.publicationyear", "9999"); + return metadata; + } + + protected String getTargetUrl(DvObject dvObjectIn) { + logger.log(Level.FINE, "getTargetUrl"); + return SystemConfig.getDataverseSiteUrlStatic() + dvObjectIn.getTargetUrl() + + dvObjectIn.getGlobalId().asString(); + } + + @Override + public String getIdentifier(DvObject dvObject) { + GlobalId gid = dvObject.getGlobalId(); + return gid != null ? gid.asString() : null; + } + + protected String generateYear(DvObject dvObjectIn) { + return dvObjectIn.getYearPublishedCreated(); + } + + public Map getMetadataForTargetURL(DvObject dvObject) { + logger.log(Level.FINE, "getMetadataForTargetURL"); + HashMap metadata = new HashMap<>(); + metadata.put("_target", getTargetUrl(dvObject)); + return metadata; + } + + @Override + public boolean alreadyRegistered(DvObject dvo) throws Exception { + if (dvo == null) { + logger.severe("Null DvObject sent to alreadyRegistered()."); + return false; + } + GlobalId globalId = dvo.getGlobalId(); + if (globalId == null) { + return false; + } + return alreadyRegistered(globalId, false); + } + + public abstract boolean alreadyRegistered(GlobalId globalId, boolean noProviderDefault) throws Exception; + + /* + * ToDo: the DvObject being sent in provides partial support for the case where + * it has a different authority/protocol than what is configured (i.e. a legacy + * Pid that can actually be updated by the Pid account being used.) Removing + * this now would potentially break/make it harder to handle that case prior to + * support for configuring multiple Pid providers. Once that exists, it would be + * cleaner to always find the PidProvider associated with the + * protocol/authority/shoulder of the current dataset and then not pass the + * DvObject as a param. (This would also remove calls to get the settings since + * that would be done at construction.) + */ + @Override + public DvObject generatePid(DvObject dvObject) { + + if (dvObject.getProtocol() == null) { + dvObject.setProtocol(getProtocol()); + } else { + if (!dvObject.getProtocol().equals(getProtocol())) { + logger.warning("The protocol of the DvObject (" + dvObject.getProtocol() + + ") does not match the configured protocol (" + getProtocol() + ")"); + throw new IllegalArgumentException("The protocol of the DvObject (" + dvObject.getProtocol() + + ") doesn't match that of the provider, id: " + getId()); + } + } + if (dvObject.getAuthority() == null) { + dvObject.setAuthority(getAuthority()); + } else { + if (!dvObject.getAuthority().equals(getAuthority())) { + logger.warning("The authority of the DvObject (" + dvObject.getAuthority() + + ") does not match the configured authority (" + getAuthority() + ")"); + throw new IllegalArgumentException("The authority of the DvObject (" + dvObject.getAuthority() + + ") doesn't match that of the provider, id: " + getId()); + } + } + if (dvObject.isInstanceofDataset()) { + dvObject.setIdentifier(generateDatasetIdentifier((Dataset) dvObject)); + } else { + dvObject.setIdentifier(generateDataFileIdentifier((DataFile) dvObject)); + } + return dvObject; + } + + private String generateDatasetIdentifier(Dataset dataset) { + String shoulder = getShoulder(); + + switch (getIdentifierGenerationStyle()) { + case "randomString": + return generateIdentifierAsRandomString(dataset, shoulder); + case "storedProcGenerated": + return generateIdentifierFromStoredProcedureIndependent(dataset, shoulder); + default: + /* Should we throw an exception instead?? -- L.A. 4.6.2 */ + return generateIdentifierAsRandomString(dataset, shoulder); + } + } + + /** + * Check that a identifier entered by the user is unique (not currently used for + * any other study in this Dataverse Network) also check for duplicate in EZID + * if needed + * + * @param userIdentifier + * @param dataset + * @return {@code true} if the identifier is unique, {@code false} otherwise. + */ + public boolean isGlobalIdUnique(GlobalId globalId) { + if (!pidProviderService.isGlobalIdLocallyUnique(globalId)) { + return false; // duplication found in local database + } + + // not in local DB, look in the persistent identifier service + try { + return !alreadyRegistered(globalId, false); + } catch (Exception e) { + // we can live with failure - means identifier not found remotely + } + + return true; + } + + /** + * Parse a Persistent Id and set the protocol, authority, and identifier + * + * Example 1: doi:10.5072/FK2/BYM3IW protocol: doi authority: 10.5072 + * identifier: FK2/BYM3IW + * + * Example 2: hdl:1902.1/111012 protocol: hdl authority: 1902.1 identifier: + * 111012 + * + * @param identifierString + * @param separator the string that separates the authority from the + * identifier. + * @param destination the global id that will contain the parsed data. + * @return {@code destination}, after its fields have been updated, or + * {@code null} if parsing failed. + */ + @Override + public GlobalId parsePersistentId(String fullIdentifierString) { + // Occasionally, the protocol separator character ':' comes in still + // URL-encoded as %3A (usually as a result of the URL having been + // encoded twice): + fullIdentifierString = fullIdentifierString.replace("%3A", ":"); + + int index1 = fullIdentifierString.indexOf(':'); + if (index1 > 0) { // ':' found with one or more characters before it + String protocol = fullIdentifierString.substring(0, index1); + GlobalId globalId = parsePersistentId(protocol, fullIdentifierString.substring(index1 + 1)); + return globalId; + } + logger.log(Level.INFO, "Error parsing identifier: {0}: '':'' not found in string", + fullIdentifierString); + return null; + } + + protected GlobalId parsePersistentId(String protocol, String identifierString) { + String authority; + String identifier; + if (identifierString == null) { + return null; + } + int index = identifierString.indexOf(getSeparator()); + if (index > 0 && (index + 1) < identifierString.length()) { + // '/' found with one or more characters + // before and after it + // Strip any whitespace, ; and ' from authority (should finding them cause a + // failure instead?) + authority = PidProvider.formatIdentifierString(identifierString.substring(0, index)); + + if (PidProvider.testforNullTerminator(authority)) { + return null; + } + identifier = PidProvider.formatIdentifierString(identifierString.substring(index + 1)); + if (PidProvider.testforNullTerminator(identifier)) { + return null; + } + + } else { + logger.log(Level.INFO, "Error parsing identifier: {0}: '':/'' not found in string", + identifierString); + return null; + } + return parsePersistentId(protocol, authority, identifier); + } + + public GlobalId parsePersistentId(String protocol, String authority, String identifier) { + logger.fine("Parsing: " + protocol + ":" + authority + getSeparator() + identifier + " in " + getId()); + if (!PidProvider.isValidGlobalId(protocol, authority, identifier)) { + return null; + } + // Check authority/identifier if this is a provider that manages specific + // identifiers + // /is not one of the unmanaged providers that has null authority + if (getAuthority() != null) { + + String cleanIdentifier = protocol + ":" + authority + getSeparator() + identifier; + /* + * Test if this provider manages this identifier - return null if it does not. + * It does match if ((the identifier's authority and shoulder match the + * provider's), or the identifier is in the managed set), and, in either case, + * the identifier is not in the excluded set. + */ + logger.fine("clean pid in " + getId() + ": " + cleanIdentifier); + logger.fine("managed in " + getId() + ": " + getManagedSet().contains(cleanIdentifier)); + logger.fine("excluded from " + getId() + ": " + getExcludedSet().contains(cleanIdentifier)); + + if (!(((authority.equals(getAuthority()) && identifier.startsWith(getShoulder())) + || getManagedSet().contains(cleanIdentifier)) && !getExcludedSet().contains(cleanIdentifier))) { + return null; + } + } + return new GlobalId(protocol, authority, identifier, getSeparator(), getUrlPrefix(), getId()); + } + + public String getSeparator() { + // The standard default + return SEPARATOR; + } + + private String generateDataFileIdentifier(DataFile datafile) { + String doiDataFileFormat = getDatafilePidFormat(); + + String prepend = ""; + if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.DEPENDENT.toString())) { + // If format is dependent then pre-pend the dataset identifier + prepend = datafile.getOwner().getIdentifier() + SEPARATOR; + datafile.setProtocol(datafile.getOwner().getProtocol()); + datafile.setAuthority(datafile.getOwner().getAuthority()); + } else { + // If there's a shoulder prepend independent identifiers with it + prepend = getShoulder(); + datafile.setProtocol(getProtocol()); + datafile.setAuthority(getAuthority()); + } + + switch (getIdentifierGenerationStyle()) { + case "randomString": + return generateIdentifierAsRandomString(datafile, prepend); + case "storedProcGenerated": + if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.INDEPENDENT.toString())) { + return generateIdentifierFromStoredProcedureIndependent(datafile, prepend); + } else { + return generateIdentifierFromStoredProcedureDependent(datafile, prepend); + } + default: + /* Should we throw an exception instead?? -- L.A. 4.6.2 */ + return generateIdentifierAsRandomString(datafile, prepend); + } + } + + /* + * This method checks locally for a DvObject with the same PID and if that is + * OK, checks with the PID service. + * + * @param dvo - the object to check (ToDo - get protocol/authority from this + * PidProvider object) + * + * @param prepend - for Datasets, this is always the shoulder, for DataFiles, it + * could be the shoulder or the parent Dataset identifier + */ + private String generateIdentifierAsRandomString(DvObject dvo, String prepend) { + String identifier = null; + do { + identifier = prepend + RandomStringUtils.randomAlphanumeric(6).toUpperCase(); + } while (!isGlobalIdUnique(new GlobalId(dvo.getProtocol(), dvo.getAuthority(), identifier, this.getSeparator(), + this.getUrlPrefix(), this.getId()))); + + return identifier; + } + + /* + * This method checks locally for a DvObject with the same PID and if that is + * OK, checks with the PID service. + * + * @param dvo - the object to check (ToDo - get protocol/authority from this + * PidProvider object) + * + * @param prepend - for Datasets, this is always the shoulder, for DataFiles, it + * could be the shoulder or the parent Dataset identifier + */ + + private String generateIdentifierFromStoredProcedureIndependent(DvObject dvo, String prepend) { + String identifier; + do { + String identifierFromStoredProcedure = pidProviderService.generateNewIdentifierByStoredProcedure(); + // some diagnostics here maybe - is it possible to determine that it's failing + // because the stored procedure hasn't been created in the database? + if (identifierFromStoredProcedure == null) { + return null; + } + identifier = prepend + identifierFromStoredProcedure; + } while (!isGlobalIdUnique(new GlobalId(dvo.getProtocol(), dvo.getAuthority(), identifier, this.getSeparator(), + this.getUrlPrefix(), this.getId()))); + + return identifier; + } + + /* + * This method is only used for DataFiles with DEPENDENT Pids. It is not for + * Datasets + * + */ + private String generateIdentifierFromStoredProcedureDependent(DataFile datafile, String prepend) { + String identifier; + Long retVal; + retVal = Long.valueOf(0L); + // ToDo - replace loops with one lookup for largest entry? (the do loop runs + // ~n**2/2 calls). The check for existingIdentifiers means this is mostly a + // local loop now, versus involving db or PidProvider calls, but still...) + + // This will catch identifiers already assigned in the current transaction (e.g. + // in FinalizeDatasetPublicationCommand) that haven't been committed to the db + // without having to make a call to the PIDProvider + Set existingIdentifiers = new HashSet(); + List files = datafile.getOwner().getFiles(); + for (DataFile f : files) { + existingIdentifiers.add(f.getIdentifier()); + } + + do { + retVal++; + identifier = prepend + retVal.toString(); + + } while (existingIdentifiers.contains(identifier) || !isGlobalIdUnique(new GlobalId(datafile.getProtocol(), + datafile.getAuthority(), identifier, this.getSeparator(), this.getUrlPrefix(), this.getId()))); + + return identifier; + } + + + @Override + public boolean canManagePID() { + // The default expectation is that PID providers are configured to manage some + // set (i.e. based on protocol/authority/shoulder) of PIDs + return true; + } + + @Override + public void setPidProviderServiceBean(PidProviderFactoryBean pidProviderServiceBean) { + this.pidProviderService = pidProviderServiceBean; + } + + @Override + public String getProtocol() { + return protocol; + } + + @Override + public String getAuthority() { + return authority; + } + + @Override + public String getShoulder() { + return shoulder; + } + + @Override + public String getIdentifierGenerationStyle() { + return identifierGenerationStyle; + } + + @Override + public String getDatafilePidFormat() { + return datafilePidFormat; + } + + @Override + public Set getManagedSet() { + return managedSet; + } + + @Override + public Set getExcludedSet() { + return excludedSet; + } + + @Override + public String getId() { + return id; + } + + @Override + public String getLabel() { + return label; + } + + @Override + /** + * True if this provider can manage PIDs in general, this pid is not in the + * managedSet (meaning it is managed but the provider does not generally manage + * it's protocol/authority/separator/shoulder) and either this provider is the + * same as the pid's or we're allowed to create INDEPENDENT pids. The latter + * clause covers the potential case where the effective pid provider/generator + * for the dataset is set to a different one that handles the dataset's pid + * itself. In this case, we can create file PIDs if they are independent. + * + * @param pid - the related pid to check + * @return true if this provider can manage PIDs like the one supplied + */ + public boolean canCreatePidsLike(GlobalId pid) { + return canManagePID() && !managedSet.contains(pid.asString()) + && (getIdentifierGenerationStyle().equals("INDEPENDENT") || getId().equals(pid.getProviderId())); + } + + @Override + public JsonObject getProviderSpecification() { + JsonObjectBuilder providerSpecification = Json.createObjectBuilder(); + providerSpecification.add("id", id); + providerSpecification.add("label", label); + providerSpecification.add("protocol", protocol); + providerSpecification.add("authority", authority); + providerSpecification.add("separator", getSeparator()); + providerSpecification.add("shoulder", shoulder); + providerSpecification.add("identifierGenerationStyle", identifierGenerationStyle); + providerSpecification.add("datafilePidFormat", datafilePidFormat); + providerSpecification.add("managedSet", Strings.join(",", managedSet.toArray())); + providerSpecification.add("excludedSet", Strings.join(",", excludedSet.toArray())); + return providerSpecification.build(); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PermaLinkPidProviderServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PermaLinkPidProviderServiceBean.java deleted file mode 100644 index d145a7ec106..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PermaLinkPidProviderServiceBean.java +++ /dev/null @@ -1,160 +0,0 @@ -package edu.harvard.iq.dataverse.pidproviders; - -import edu.harvard.iq.dataverse.AbstractGlobalIdServiceBean; -import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; -import edu.harvard.iq.dataverse.settings.JvmSettings; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; -import edu.harvard.iq.dataverse.util.SystemConfig; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.logging.Logger; - -import jakarta.annotation.PostConstruct; -import jakarta.ejb.Stateless; - -/** - * PermaLink provider - * This is a minimalist permanent ID provider intended for use with 'real' datasets/files where the use case none-the-less doesn't lend itself to the use of DOIs or Handles, e.g. - * * due to cost - * * for a catalog/archive where Dataverse has a dataset representing a dataset with DOI/handle stored elsewhere - * - * The initial implementation will mint identifiers locally and will provide the existing page URLs (using the ?persistentID= format). - * This will be overridable by a configurable parameter to support use of an external resolver. - * - */ -@Stateless -public class PermaLinkPidProviderServiceBean extends AbstractGlobalIdServiceBean { - - private static final Logger logger = Logger.getLogger(PermaLinkPidProviderServiceBean.class.getCanonicalName()); - - public static final String PERMA_PROTOCOL = "perma"; - public static final String PERMA_PROVIDER_NAME = "PERMA"; - - //ToDo - handle dataset/file defaults for local system - public static final String PERMA_RESOLVER_URL = JvmSettings.PERMALINK_BASEURL - .lookupOptional() - .orElse(SystemConfig.getDataverseSiteUrlStatic()); - - String authority = null; - private String separator = ""; - - @PostConstruct - private void init() { - if(PERMA_PROTOCOL.equals(settingsService.getValueForKey(Key.Protocol))){ - authority = settingsService.getValueForKey(Key.Authority); - configured=true; - }; - - } - - - //Only used in PidUtilTest - haven't figured out how to mock a PostConstruct call directly - // ToDo - remove after work to allow more than one Pid Provider which is expected to not use stateless beans - public void reInit() { - init(); - } - - @Override - public String getSeparator() { - //The perma default - return separator; - } - - @Override - public boolean alreadyRegistered(GlobalId globalId, boolean noProviderDefault) { - // Perma doesn't manage registration, so we assume all local PIDs can be treated - // as registered - boolean existsLocally = !dvObjectService.isGlobalIdLocallyUnique(globalId); - return existsLocally ? existsLocally : noProviderDefault; - } - - @Override - public boolean registerWhenPublished() { - return false; - } - - @Override - public List getProviderInformation() { - return List.of(PERMA_PROVIDER_NAME, PERMA_RESOLVER_URL); - } - - @Override - public String createIdentifier(DvObject dvo) throws Throwable { - //Call external resolver and send landing URL? - //FWIW: Return value appears to only be used in RegisterDvObjectCommand where success requires finding the dvo identifier in this string. (Also logged a couple places). - return(dvo.getGlobalId().asString()); - } - - @Override - public Map getIdentifierMetadata(DvObject dvo) { - Map map = new HashMap<>(); - return map; - } - - @Override - public String modifyIdentifierTargetURL(DvObject dvo) throws Exception { - return getTargetUrl(dvo); - } - - @Override - public void deleteIdentifier(DvObject dvo) throws Exception { - // no-op - } - - @Override - public boolean publicizeIdentifier(DvObject dvObject) { - //Generate if needed (i.e. datafile case where we don't create/register early (even with reigsterWhenPublished == false)) - if(dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty() ){ - dvObject = generateIdentifier(dvObject); - } - //Call external resolver and send landing URL? - return true; - } - - @Override - public GlobalId parsePersistentId(String pidString) { - //ToDo - handle local PID resolver for dataset/file - if (pidString.startsWith(getUrlPrefix())) { - pidString = pidString.replace(getUrlPrefix(), - (PERMA_PROTOCOL + ":")); - } - return super.parsePersistentId(pidString); - } - - @Override - public GlobalId parsePersistentId(String protocol, String identifierString) { - logger.fine("Checking Perma: " + identifierString); - if (!PERMA_PROTOCOL.equals(protocol)) { - return null; - } - String identifier = null; - if (authority != null) { - if (identifierString.startsWith(authority)) { - identifier = identifierString.substring(authority.length()); - } - } - identifier = GlobalIdServiceBean.formatIdentifierString(identifier); - if (GlobalIdServiceBean.testforNullTerminator(identifier)) { - return null; - } - return new GlobalId(PERMA_PROTOCOL, authority, identifier, separator, getUrlPrefix(), PERMA_PROVIDER_NAME); - } - - @Override - public GlobalId parsePersistentId(String protocol, String authority, String identifier) { - if (!PERMA_PROTOCOL.equals(protocol)) { - return null; - } - return super.parsePersistentId(protocol, authority, identifier); - } - - @Override - public String getUrlPrefix() { - - return PERMA_RESOLVER_URL + "/citation?persistentId=" + PERMA_PROTOCOL + ":"; - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidHelper.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidHelper.java deleted file mode 100644 index 5bc855a9593..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidHelper.java +++ /dev/null @@ -1,43 +0,0 @@ -package edu.harvard.iq.dataverse.pidproviders; - -import java.util.Arrays; -import jakarta.annotation.PostConstruct; -import jakarta.ejb.EJB; -import jakarta.ejb.Singleton; -import jakarta.ejb.Startup; - -import edu.harvard.iq.dataverse.DOIDataCiteServiceBean; -import edu.harvard.iq.dataverse.DOIEZIdServiceBean; -import edu.harvard.iq.dataverse.HandlenetServiceBean; - - /** - * This is a small helper bean - * As it is a singleton and built at application start (=deployment), it will inject the (stateless) - * dataverse service into the BrandingUtil once it's ready. - */ - @Startup - @Singleton - public class PidHelper { - - @EJB - DOIDataCiteServiceBean datacitePidSvc; - @EJB - DOIEZIdServiceBean ezidPidSvc; - @EJB - HandlenetServiceBean handlePidSvc; - @EJB - FakePidProviderServiceBean fakePidSvc; - @EJB - PermaLinkPidProviderServiceBean permaPidSvc; - @EJB - UnmanagedDOIServiceBean unmanagedDOISvc; - @EJB - UnmanagedHandlenetServiceBean unmanagedHandleSvc; - - @PostConstruct - public void listServices() { - PidUtil.addAllToProviderList(Arrays.asList(datacitePidSvc, ezidPidSvc, handlePidSvc, permaPidSvc, fakePidSvc)); - PidUtil.addAllToUnmanagedProviderList(Arrays.asList(unmanagedDOISvc, unmanagedHandleSvc)); - } - - } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProvider.java similarity index 56% rename from src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java rename to src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProvider.java index aebf13778c3..ea3a243f25c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProvider.java @@ -1,19 +1,16 @@ -package edu.harvard.iq.dataverse; +package edu.harvard.iq.dataverse.pidproviders; -import static edu.harvard.iq.dataverse.GlobalIdServiceBean.logger; -import edu.harvard.iq.dataverse.engine.command.CommandContext; -import edu.harvard.iq.dataverse.pidproviders.PermaLinkPidProviderServiceBean; -import edu.harvard.iq.dataverse.pidproviders.PidUtil; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import jakarta.json.JsonObject; +import jakarta.json.JsonValue; import java.util.*; -import java.util.function.Function; -import java.util.logging.Level; import java.util.logging.Logger; -public interface GlobalIdServiceBean { +public interface PidProvider { - static final Logger logger = Logger.getLogger(GlobalIdServiceBean.class.getCanonicalName()); + static final Logger logger = Logger.getLogger(PidProvider.class.getCanonicalName()); boolean alreadyRegistered(DvObject dvo) throws Exception; @@ -36,7 +33,6 @@ public interface GlobalIdServiceBean { boolean registerWhenPublished(); boolean canManagePID(); - boolean isConfigured(); List getProviderInformation(); @@ -52,36 +48,24 @@ public interface GlobalIdServiceBean { Map getMetadataForTargetURL(DvObject dvObject); - DvObject generateIdentifier(DvObject dvObject); + DvObject generatePid(DvObject dvObject); String getIdentifier(DvObject dvObject); boolean publicizeIdentifier(DvObject studyIn); - String generateDatasetIdentifier(Dataset dataset); - String generateDataFileIdentifier(DataFile datafile); boolean isGlobalIdUnique(GlobalId globalId); String getUrlPrefix(); String getSeparator(); - static GlobalIdServiceBean getBean(String protocol, CommandContext ctxt) { - final Function protocolHandler = BeanDispatcher.DISPATCHER.get(protocol); - if ( protocolHandler != null ) { - GlobalIdServiceBean theBean = protocolHandler.apply(ctxt); - if(theBean != null && theBean.isConfigured()) { - logger.fine("getBean returns " + theBean.getProviderInformation().get(0) + " for protocol " + protocol); - } - return theBean; - } else { - logger.log(Level.SEVERE, "Unknown protocol: {0}", protocol); - return null; - } - } - - static GlobalIdServiceBean getBean(CommandContext ctxt) { - return getBean(ctxt.settings().getValueForKey(Key.Protocol, ""), ctxt); - } + String getProtocol(); + String getProviderType(); + String getId(); + String getLabel(); + String getAuthority(); + String getShoulder(); + String getIdentifierGenerationStyle(); public static Optional parse(String identifierString) { try { @@ -111,6 +95,7 @@ public static Optional parse(String identifierString) { * {@code null} if parsing failed. */ public GlobalId parsePersistentId(String identifierString); + public GlobalId parsePersistentId(String protocol, String authority, String identifier); @@ -119,16 +104,16 @@ public static boolean isValidGlobalId(String protocol, String authority, String if (protocol == null || authority == null || identifier == null) { return false; } - if(!authority.equals(GlobalIdServiceBean.formatIdentifierString(authority))) { + if(!authority.equals(PidProvider.formatIdentifierString(authority))) { return false; } - if (GlobalIdServiceBean.testforNullTerminator(authority)) { + if (PidProvider.testforNullTerminator(authority)) { return false; } - if(!identifier.equals(GlobalIdServiceBean.formatIdentifierString(identifier))) { + if(!identifier.equals(PidProvider.formatIdentifierString(identifier))) { return false; } - if (GlobalIdServiceBean.testforNullTerminator(identifier)) { + if (PidProvider.testforNullTerminator(identifier)) { return false; } return true; @@ -177,40 +162,28 @@ static boolean checkDOIAuthority(String doiAuthority){ return true; } -} - - -/* - * ToDo - replace this with a mechanism like BrandingUtilHelper that would read - * the config and create PidProviders, one per set of config values and serve - * those as needed. The help has to be a bean to autostart and to hand the - * required service beans to the PidProviders. That may boil down to just the - * dvObjectService (to check for local identifier conflicts) since it will be - * the helper that has to read settings/get systewmConfig values. - * - */ - -/** - * Static utility class for dispatching implementing beans, based on protocol and providers. - * @author michael - */ -class BeanDispatcher { - static final Map> DISPATCHER = new HashMap<>(); - - static { - DISPATCHER.put("hdl", ctxt->ctxt.handleNet() ); - DISPATCHER.put("doi", ctxt->{ - String doiProvider = ctxt.settings().getValueForKey(Key.DoiProvider, ""); - switch ( doiProvider ) { - case "EZID": return ctxt.doiEZId(); - case "DataCite": return ctxt.doiDataCite(); - case "FAKE": return ctxt.fakePidProvider(); - default: - logger.log(Level.SEVERE, "Unknown doiProvider: {0}", doiProvider); - return null; - } - }); - - DISPATCHER.put(PermaLinkPidProviderServiceBean.PERMA_PROTOCOL, ctxt->ctxt.permaLinkProvider() ); - } + + public void setPidProviderServiceBean(PidProviderFactoryBean pidProviderFactoryBean); + + String getDatafilePidFormat(); + + Set getManagedSet(); + + Set getExcludedSet(); + + /** + * Whether related pids can be created by this pid provider + * @see edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider#canCreatePidsLike(GlobalId) more details in the abstract implementation + * + * @param pid + * @return - whether related pids can be created by this pid provider. + */ + boolean canCreatePidsLike(GlobalId pid); + + /** + * Returns a JSON representation of this pid provider including it's id, label, protocol, authority, separator, and identifier. + * @return + */ + public JsonObject getProviderSpecification(); + } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactory.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactory.java new file mode 100644 index 00000000000..f7c1a4b9174 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactory.java @@ -0,0 +1,8 @@ +package edu.harvard.iq.dataverse.pidproviders; + +public interface PidProviderFactory { + + String getType(); + + PidProvider createPidProvider(String id); +} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactoryBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactoryBean.java new file mode 100644 index 00000000000..7d6e5f57ea9 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactoryBean.java @@ -0,0 +1,251 @@ +package edu.harvard.iq.dataverse.pidproviders; + +import java.io.IOException; +import java.net.URL; +import java.net.URLClassLoader; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Optional; +import java.util.ServiceLoader; +import java.util.logging.Level; +import java.util.logging.Logger; + +import jakarta.annotation.PostConstruct; +import jakarta.ejb.EJB; +import jakarta.ejb.Singleton; +import jakarta.ejb.Startup; +import jakarta.inject.Inject; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.DvObjectServiceBean; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.pidproviders.doi.UnmanagedDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DataCiteDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.ezid.EZIdDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.fake.FakeDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.UnmanagedHandlePidProvider; +import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; +import edu.harvard.iq.dataverse.pidproviders.perma.UnmanagedPermaLinkPidProvider; + +/** + * This Bean loads all of the PidProviderFactory types available (e.g. EZID, + * DataCite, Handle, PermaLink) and then reads the configuration to load + * particular PidProviders (e.g. a DataCite provider with a specific + * authority/shoulder, username/password, etc.) + */ +@Startup +@Singleton +public class PidProviderFactoryBean { + + private static final Logger logger = Logger.getLogger(PidProviderFactoryBean.class.getCanonicalName()); + + @Inject + DataverseServiceBean dataverseService; + @EJB + protected SettingsServiceBean settingsService; + @Inject + protected DvObjectServiceBean dvObjectService; + @Inject + SystemConfig systemConfig; + + private ServiceLoader loader; + private Map pidProviderFactoryMap = new HashMap<>(); + + @PostConstruct + public void init() { + loadProviderFactories(); + loadProviders(); + } + + private void loadProviderFactories() { + /* + * Step 1 - find the PROVIDERS dir and add all jar files there to a class loader + */ + List jarUrls = new ArrayList<>(); + Optional providerPathSetting = JvmSettings.PIDPROVIDERS_DIRECTORY.lookupOptional(String.class); + if (providerPathSetting.isPresent()) { + Path providersDir = Paths.get(providerPathSetting.get()); + // Get all JAR files from the configured directory + try (DirectoryStream stream = Files.newDirectoryStream(providersDir, "*.jar")) { + // Using the foreach loop here to enable catching the URI/URL exceptions + for (Path path : stream) { + logger.log(Level.FINE, "Adding {0}", path.toUri().toURL()); + // This is the syntax required to indicate a jar file from which classes should + // be loaded (versus a class file). + jarUrls.add(new URL("jar:" + path.toUri().toURL() + "!/")); + } + } catch (IOException e) { + logger.warning("Problem accessing external Providers: " + e.getLocalizedMessage()); + } + } + URLClassLoader cl = URLClassLoader.newInstance(jarUrls.toArray(new URL[0]), this.getClass().getClassLoader()); + + /* + * Step 2 - load all PidProcviderFactories that can be found, using the jars as + * additional sources + */ + loader = ServiceLoader.load(PidProviderFactory.class, cl); + /* + * Step 3 - Fill pidProviderFactoryMap with type as the key, allow external + * factories to replace internal ones for the same type. FWIW: From the logging + * it appears that ServiceLoader returns classes in ~ alphabetical order rather + * than by class loader, so internal classes handling a given providerName may + * be processed before or after external ones. + */ + loader.forEach(providerFactory -> { + String type = providerFactory.getType(); + logger.fine("Loaded PidProviderFactory of type: " + type); + // If no entry for this providerName yet or if it is an external provider + if (!pidProviderFactoryMap.containsKey(type) || providerFactory.getClass().getClassLoader().equals(cl)) { + logger.fine("Adding PidProviderFactory of type: " + type + " to the map"); + pidProviderFactoryMap.put(type, providerFactory); + } + logger.log(Level.FINE, + "Loaded PidProviderFactory of type: " + type + " from " + + providerFactory.getClass().getCanonicalName() + " and classloader: " + + providerFactory.getClass().getClassLoader().getClass().getCanonicalName()); + }); + } + + private void loadProviders() { + Optional providers = JvmSettings.PID_PROVIDERS.lookupOptional(String[].class); + if (!providers.isPresent()) { + logger.warning( + "No PidProviders configured via dataverse.pid.providers. Please consider updating as older PIDProvider configuration mechanisms will be removed in a future version of Dataverse."); + return; + } else { + for (String id : providers.get()) { + //Allows spaces in PID_PROVIDERS setting + id=id.trim(); + Optional type = JvmSettings.PID_PROVIDER_TYPE.lookupOptional(id); + if (!type.isPresent()) { + logger.warning("PidProvider " + id + + " listed in dataverse.pid.providers is not properly configured and will not be used."); + } else { + String typeString = type.get(); + if (pidProviderFactoryMap.containsKey(typeString)) { + PidProvider provider = pidProviderFactoryMap.get(typeString).createPidProvider(id); + provider.setPidProviderServiceBean(this); + PidUtil.addToProviderList(provider); + } + } + } + } + String protocol = settingsService.getValueForKey(SettingsServiceBean.Key.Protocol); + String authority = settingsService.getValueForKey(SettingsServiceBean.Key.Authority); + String shoulder = settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder); + String provider = settingsService.getValueForKey(SettingsServiceBean.Key.DoiProvider); + + if (protocol != null && authority != null && shoulder != null && provider != null) { + logger.warning("Found legacy settings: " + protocol + " " + authority + " " + shoulder + " " + provider + + "Please consider updating as this PIDProvider configuration mechanism will be removed in a future version of Dataverse"); + if (PidUtil.getPidProvider(protocol, authority, shoulder) != null) { + logger.warning( + "Legacy PID provider settings found - ignored since a provider for the same protocol, authority, shoulder has been registered"); + } else { + PidProvider legacy = null; + // Try to add a legacy provider + String identifierGenerationStyle = settingsService + .getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "random"); + String dataFilePidFormat = settingsService.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, + "DEPENDENT"); + switch (protocol) { + case "doi": + switch (provider) { + case "EZID": + + String baseUrl = JvmSettings.LEGACY_EZID_API_URL.lookup(); + String username = JvmSettings.LEGACY_EZID_USERNAME.lookup(); + String password = JvmSettings.LEGACY_EZID_PASSWORD.lookup(); + PidUtil.addToProviderList(new EZIdDOIProvider("legacy", "legacy", authority, shoulder, + identifierGenerationStyle, dataFilePidFormat, "", "", baseUrl, username, password)); + + break; + case "DataCite": + String mdsUrl = JvmSettings.LEGACY_DATACITE_MDS_API_URL.lookup(); + String restUrl = JvmSettings.LEGACY_DATACITE_REST_API_URL.lookup(); + // Defaults for testing where no account is set up + String dcUsername = JvmSettings.LEGACY_DATACITE_USERNAME.lookup(); + String dcPassword = JvmSettings.LEGACY_DATACITE_PASSWORD.lookup(); + if (mdsUrl != null && restUrl != null && dcUsername != null && dcPassword != null) { + legacy = new DataCiteDOIProvider("legacy", "legacy", authority, shoulder, + identifierGenerationStyle, dataFilePidFormat, "", "", mdsUrl, restUrl, dcUsername, + dcPassword); + } + break; + case "FAKE": + logger.warning("Adding FAKE provider"); + legacy = new FakeDOIProvider("legacy", "legacy", authority, shoulder, identifierGenerationStyle, + dataFilePidFormat, "", ""); + break; + } + break; + case "hdl": + int index = JvmSettings.LEGACY_HANDLENET_INDEX.lookup(Integer.class); + String path = JvmSettings.LEGACY_HANDLENET_KEY_PATH.lookup(); + String passphrase = JvmSettings.LEGACY_HANDLENET_KEY_PASSPHRASE.lookup(); + boolean independentHandleService = settingsService + .isTrueForKey(SettingsServiceBean.Key.IndependentHandleService, false); + String handleAuthHandle = settingsService.getValueForKey(SettingsServiceBean.Key.HandleAuthHandle); + + legacy = new HandlePidProvider("legacy", "legacy", authority, shoulder, identifierGenerationStyle, + dataFilePidFormat, "", "", index, independentHandleService, handleAuthHandle, path, + passphrase); + break; + case "perma": + String baseUrl = JvmSettings.LEGACY_PERMALINK_BASEURL.lookup(); + legacy = new PermaLinkPidProvider("legacy", "legacy", authority, shoulder, + identifierGenerationStyle, dataFilePidFormat, "", "", baseUrl, + PermaLinkPidProvider.SEPARATOR); + } + if (legacy != null) { + legacy.setPidProviderServiceBean(this); + PidUtil.addToProviderList(legacy); + } + } + logger.info("Have " + PidUtil.getManagedProviderIds().size() + " managed PID providers"); + } + PidUtil.addAllToUnmanagedProviderList(Arrays.asList(new UnmanagedDOIProvider(), + new UnmanagedHandlePidProvider(), new UnmanagedPermaLinkPidProvider())); + } + + public String getProducer() { + return dataverseService.getRootDataverseName(); + } + + public boolean isGlobalIdLocallyUnique(GlobalId globalId) { + return dvObjectService.isGlobalIdLocallyUnique(globalId); + } + + String generateNewIdentifierByStoredProcedure() { + return dvObjectService.generateNewIdentifierByStoredProcedure(); + } + + public PidProvider getDefaultPidGenerator() { + Optional pidProviderDefaultId = JvmSettings.PID_DEFAULT_PROVIDER.lookupOptional(String.class); + if (pidProviderDefaultId.isPresent()) { + return PidUtil.getPidProvider(pidProviderDefaultId.get()); + } else { + String nonNullDefaultIfKeyNotFound = ""; + String protocol = settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, + nonNullDefaultIfKeyNotFound); + String authority = settingsService.getValueForKey(SettingsServiceBean.Key.Authority, + nonNullDefaultIfKeyNotFound); + String shoulder = settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder, + nonNullDefaultIfKeyNotFound); + + return PidUtil.getPidProvider(protocol, authority, shoulder); + } + } +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidUtil.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidUtil.java index 78305648f67..279f18dcd0e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidUtil.java @@ -1,9 +1,8 @@ package edu.harvard.iq.dataverse.pidproviders; -import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; -import edu.harvard.iq.dataverse.HandlenetServiceBean; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; import edu.harvard.iq.dataverse.util.BundleUtil; import java.io.IOException; import java.io.InputStream; @@ -14,6 +13,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.logging.Logger; import jakarta.json.Json; @@ -113,24 +113,22 @@ public static JsonObjectBuilder queryDoi(GlobalId globalId, String baseUrl, Stri * @return DOI in the form 10.7910/DVN/TJCLKP (no "doi:") */ private static String acceptOnlyDoi(GlobalId globalId) { - if (!DOIServiceBean.DOI_PROTOCOL.equals(globalId.getProtocol())) { + if (!AbstractDOIProvider.DOI_PROTOCOL.equals(globalId.getProtocol())) { throw new IllegalArgumentException(BundleUtil.getStringFromBundle("pids.datacite.errors.DoiOnly")); } return globalId.getAuthority() + "/" + globalId.getIdentifier(); } - static Map providerMap = new HashMap(); - static Map unmanagedProviderMap = new HashMap(); + static Map providerMap = new HashMap(); + static Map unmanagedProviderMap = new HashMap(); - public static void addAllToProviderList(List list) { - for (GlobalIdServiceBean pidProvider : list) { - providerMap.put(pidProvider.getProviderInformation().get(0), pidProvider); - } + public static void addToProviderList(PidProvider pidProvider) { + providerMap.put(pidProvider.getId(), pidProvider); } - public static void addAllToUnmanagedProviderList(List list) { - for (GlobalIdServiceBean pidProvider : list) { - unmanagedProviderMap.put(pidProvider.getProviderInformation().get(0), pidProvider); + public static void addAllToUnmanagedProviderList(List list) { + for (PidProvider pidProvider : list) { + unmanagedProviderMap.put(pidProvider.getId(), pidProvider); } } @@ -141,7 +139,7 @@ public static void addAllToUnmanagedProviderList(List list) */ public static GlobalId parseAsGlobalID(String identifier) { logger.fine("In parseAsGlobalId: " + providerMap.size()); - for (GlobalIdServiceBean pidProvider : providerMap.values()) { + for (PidProvider pidProvider : providerMap.values()) { logger.fine(" Checking " + String.join(",", pidProvider.getProviderInformation())); GlobalId globalId = pidProvider.parsePersistentId(identifier); if (globalId != null) { @@ -149,7 +147,7 @@ public static GlobalId parseAsGlobalID(String identifier) { } } // If no providers can managed this PID, at least allow it to be recognized - for (GlobalIdServiceBean pidProvider : unmanagedProviderMap.values()) { + for (PidProvider pidProvider : unmanagedProviderMap.values()) { logger.fine(" Checking " + String.join(",", pidProvider.getProviderInformation())); GlobalId globalId = pidProvider.parsePersistentId(identifier); if (globalId != null) { @@ -167,14 +165,14 @@ public static GlobalId parseAsGlobalID(String identifier) { public static GlobalId parseAsGlobalID(String protocol, String authority, String identifier) { logger.fine("Looking for " + protocol + " " + authority + " " + identifier); logger.fine("In parseAsGlobalId: " + providerMap.size()); - for (GlobalIdServiceBean pidProvider : providerMap.values()) { + for (PidProvider pidProvider : providerMap.values()) { logger.fine(" Checking " + String.join(",", pidProvider.getProviderInformation())); GlobalId globalId = pidProvider.parsePersistentId(protocol, authority, identifier); if (globalId != null) { return globalId; } } - for (GlobalIdServiceBean pidProvider : unmanagedProviderMap.values()) { + for (PidProvider pidProvider : unmanagedProviderMap.values()) { logger.fine(" Checking " + String.join(",", pidProvider.getProviderInformation())); GlobalId globalId = pidProvider.parsePersistentId(protocol, authority, identifier); if (globalId != null) { @@ -191,28 +189,96 @@ public static GlobalId parseAsGlobalID(String protocol, String authority, String * This method should be deprecated/removed when further refactoring to support * multiple PID providers is done. At that point, when the providers aren't * beans, this code can be moved into other classes that go in the providerMap. - * If this method is not kept in sync with the DOIServiceBean and - * HandlenetServiceBean implementations, the tests using it won't be valid tests - * of the production code. + * If this method is not kept in sync with the AbstractDOIProvider and HandlePidProvider + * implementations, the tests using it won't be valid tests of the production + * code. */ private static GlobalId parseUnmanagedDoiOrHandle(String protocol, String authority, String identifier) { // Default recognition - could be moved to new classes in the future. - if (!GlobalIdServiceBean.isValidGlobalId(protocol, authority, identifier)) { + if (!PidProvider.isValidGlobalId(protocol, authority, identifier)) { return null; } String urlPrefix = null; switch (protocol) { - case DOIServiceBean.DOI_PROTOCOL: - if (!GlobalIdServiceBean.checkDOIAuthority(authority)) { + case AbstractDOIProvider.DOI_PROTOCOL: + if (!PidProvider.checkDOIAuthority(authority)) { return null; } - urlPrefix = DOIServiceBean.DOI_RESOLVER_URL; + urlPrefix = AbstractDOIProvider.DOI_RESOLVER_URL; break; - case HandlenetServiceBean.HDL_PROTOCOL: - urlPrefix = HandlenetServiceBean.HDL_RESOLVER_URL; + case HandlePidProvider.HDL_PROTOCOL: + urlPrefix = HandlePidProvider.HDL_RESOLVER_URL; break; } return new GlobalId(protocol, authority, identifier, "/", urlPrefix, null); } + + /** + * Get a PidProvider by name. GlobalIds have a getProviderName() method so this + * method is often used as + * getPidProvider(dvObject.getGlobalId().getProviderName(); (which will fail if + * the GlobalId is null - use PidProviderFactoryBean.getPidProvider(DvObject) if + * you aren't sure. + * + */ + + public static PidProvider getPidProvider(String name) { + for (PidProvider pidProvider : providerMap.values()) { + if (name.equals(pidProvider.getId())) { + return pidProvider; + } + } + for (PidProvider pidProvider : unmanagedProviderMap.values()) { + if (name.equals(pidProvider.getId())) { + return pidProvider; + } + } + return null; + } + + + + /** + * Method to clear all managed/unmanaged PidProviders. Only for testing as these + * lists are only loaded once by the @Stateless PidProviderFactoryBean in Dataverse. + */ + public static void clearPidProviders() { + providerMap.clear(); + unmanagedProviderMap.clear(); + } + + /** + * Get a PidProvider by protocol/authority/shoulder. + */ + public static PidProvider getPidProvider(String protocol, String authority, String shoulder) { + return getPidProvider(protocol, authority, shoulder, AbstractPidProvider.SEPARATOR); + } + + public static PidProvider getPidProvider(String protocol, String authority, String shoulder, String separator) { + for (PidProvider pidProvider : providerMap.values()) { + if (protocol.equals(pidProvider.getProtocol()) && authority.equals(pidProvider.getAuthority()) + && shoulder.equals(pidProvider.getShoulder()) && separator.equals(pidProvider.getSeparator())) { + return pidProvider; + } + } + for (PidProvider pidProvider : unmanagedProviderMap.values()) { + if (protocol.equals(pidProvider.getProtocol())) { + return pidProvider; + } + } + return null; + } + + public static Set getManagedProviderIds() { + return providerMap.keySet(); + } + + public static JsonObject getProviders() { + JsonObjectBuilder builder = Json.createObjectBuilder(); + for (PidProvider pidProvider : providerMap.values()) { + builder.add(pidProvider.getId(), pidProvider.getProviderSpecification()); + } + return builder.build(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java new file mode 100644 index 00000000000..43e34e74c59 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java @@ -0,0 +1,123 @@ +package edu.harvard.iq.dataverse.pidproviders.doi; + +import java.util.Arrays; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; + + +public abstract class AbstractDOIProvider extends AbstractPidProvider { + + private static final Logger logger = Logger.getLogger(AbstractDOIProvider.class.getCanonicalName()); + + public static final String DOI_PROTOCOL = "doi"; + public static final String DOI_RESOLVER_URL = "https://doi.org/"; + public static final String HTTP_DOI_RESOLVER_URL = "http://doi.org/"; + public static final String DXDOI_RESOLVER_URL = "https://dx.doi.org/"; + public static final String HTTP_DXDOI_RESOLVER_URL = "http://dx.doi.org/"; + + public AbstractDOIProvider(String id, String label, String providerAuthority, String providerShoulder, String identifierGenerationStyle, String datafilePidFormat, String managedList, String excludedList) { + super(id, label, DOI_PROTOCOL, providerAuthority, providerShoulder, identifierGenerationStyle, datafilePidFormat, managedList, excludedList); + } + + //For Unmanged provider + public AbstractDOIProvider(String name, String label) { + super(name, label, DOI_PROTOCOL); + } + + @Override + public GlobalId parsePersistentId(String pidString) { + if (pidString.startsWith(DOI_RESOLVER_URL)) { + pidString = pidString.replace(DOI_RESOLVER_URL, + (DOI_PROTOCOL + ":")); + } else if (pidString.startsWith(HTTP_DOI_RESOLVER_URL)) { + pidString = pidString.replace(HTTP_DOI_RESOLVER_URL, + (DOI_PROTOCOL + ":")); + } else if (pidString.startsWith(DXDOI_RESOLVER_URL)) { + pidString = pidString.replace(DXDOI_RESOLVER_URL, + (DOI_PROTOCOL + ":")); + } + return super.parsePersistentId(pidString); + } + + @Override + public GlobalId parsePersistentId(String protocol, String identifierString) { + + if (!DOI_PROTOCOL.equals(protocol)) { + return null; + } + GlobalId globalId = super.parsePersistentId(protocol, identifierString); + if (globalId!=null && !PidProvider.checkDOIAuthority(globalId.getAuthority())) { + return null; + } + return globalId; + } + + @Override + public GlobalId parsePersistentId(String protocol, String authority, String identifier) { + + if (!DOI_PROTOCOL.equals(protocol)) { + return null; + } + return super.parsePersistentId(protocol, authority, identifier); + } + + public String getUrlPrefix() { + return DOI_RESOLVER_URL; + } + + protected String getProviderKeyName() { + return null; + } + + public String getProtocol() { + return DOI_PROTOCOL; + } + + public String getMetadataFromDvObject(String identifier, Map metadata, DvObject dvObject) { + + Dataset dataset = null; + + if (dvObject instanceof Dataset) { + dataset = (Dataset) dvObject; + } else { + dataset = (Dataset) dvObject.getOwner(); + } + + XmlMetadataTemplate metadataTemplate = new XmlMetadataTemplate(); + metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); + metadataTemplate.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); + metadataTemplate.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); + if (dvObject.isInstanceofDataset()) { + metadataTemplate.setDescription(dataset.getLatestVersion().getDescriptionPlainText()); + } + if (dvObject.isInstanceofDataFile()) { + DataFile df = (DataFile) dvObject; + String fileDescription = df.getDescription(); + metadataTemplate.setDescription(fileDescription == null ? "" : fileDescription); + } + + metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); + metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers()); + metadataTemplate.setTitle(dvObject.getCurrentName()); + String producerString = pidProviderService.getProducer(); + if (producerString.isEmpty() || producerString.equals(DatasetField.NA_VALUE)) { + producerString = UNAVAILABLE; + } + metadataTemplate.setPublisher(producerString); + metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); + + String xmlMetadata = metadataTemplate.generateXML(dvObject); + logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); + return xmlMetadata; + } + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedDOIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/UnmanagedDOIProvider.java similarity index 73% rename from src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedDOIServiceBean.java rename to src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/UnmanagedDOIProvider.java index f7e9372cc9b..d4e674f8396 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedDOIServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/UnmanagedDOIProvider.java @@ -1,17 +1,11 @@ -package edu.harvard.iq.dataverse.pidproviders; +package edu.harvard.iq.dataverse.pidproviders.doi; import java.io.IOException; import java.util.List; import java.util.Map; -import java.util.logging.Logger; - -import jakarta.annotation.PostConstruct; -import jakarta.ejb.Stateless; - import org.apache.commons.httpclient.HttpException; import org.apache.commons.lang3.NotImplementedException; -import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; @@ -20,15 +14,13 @@ * */ -@Stateless -public class UnmanagedDOIServiceBean extends DOIServiceBean { +public class UnmanagedDOIProvider extends AbstractDOIProvider { - private static final Logger logger = Logger.getLogger(UnmanagedDOIServiceBean.class.getCanonicalName()); + public static final String ID = "UnmanagedDOIProvider"; - @PostConstruct - private void init() { - // Always on - configured = true; + public UnmanagedDOIProvider() { + //Also using ID as label + super(ID, ID); } @Override @@ -73,11 +65,15 @@ public boolean publicizeIdentifier(DvObject dvObject) { @Override public List getProviderInformation() { - return List.of("UnmanagedDOIProvider", ""); + return List.of(getId(), ""); } + @Override + public String getProviderType() { + return "unamagedDOI"; + } // PID recognition - // Done by DOIServiceBean + // Done by AbstractDOIProvider } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java new file mode 100644 index 00000000000..30e4dfd79cc --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -0,0 +1,314 @@ +package edu.harvard.iq.dataverse.pidproviders.doi; + +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetAuthor; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; + +public class XmlMetadataTemplate { + + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.DataCiteMetadataTemplate"); + private static String template; + + static { + try (InputStream in = XmlMetadataTemplate.class.getResourceAsStream("datacite_metadata_template.xml")) { + template = new String(in.readAllBytes(), StandardCharsets.UTF_8); + } catch (Exception e) { + logger.log(Level.SEVERE, "datacite metadata template load error"); + logger.log(Level.SEVERE, "String " + e.toString()); + logger.log(Level.SEVERE, "localized message " + e.getLocalizedMessage()); + logger.log(Level.SEVERE, "cause " + e.getCause()); + logger.log(Level.SEVERE, "message " + e.getMessage()); + } + } + + private String xmlMetadata; + private String identifier; + private List datafileIdentifiers; + private List creators; + private String title; + private String publisher; + private String publisherYear; + private List authors; + private String description; + private List contacts; + private List producers; + + public List getProducers() { + return producers; + } + + public void setProducers(List producers) { + this.producers = producers; + } + + public List getContacts() { + return contacts; + } + + public void setContacts(List contacts) { + this.contacts = contacts; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public List getAuthors() { + return authors; + } + + public void setAuthors(List authors) { + this.authors = authors; + } + + public XmlMetadataTemplate() { + } + + public List getDatafileIdentifiers() { + return datafileIdentifiers; + } + + public void setDatafileIdentifiers(List datafileIdentifiers) { + this.datafileIdentifiers = datafileIdentifiers; + } + + public XmlMetadataTemplate(String xmlMetaData) { + this.xmlMetadata = xmlMetaData; + Document doc = Jsoup.parseBodyFragment(xmlMetaData); + Elements identifierElements = doc.select("identifier"); + if (identifierElements.size() > 0) { + identifier = identifierElements.get(0).html(); + } + Elements creatorElements = doc.select("creatorName"); + creators = new ArrayList<>(); + for (Element creatorElement : creatorElements) { + creators.add(creatorElement.html()); + } + Elements titleElements = doc.select("title"); + if (titleElements.size() > 0) { + title = titleElements.get(0).html(); + } + Elements publisherElements = doc.select("publisher"); + if (publisherElements.size() > 0) { + publisher = publisherElements.get(0).html(); + } + Elements publisherYearElements = doc.select("publicationYear"); + if (publisherYearElements.size() > 0) { + publisherYear = publisherYearElements.get(0).html(); + } + } + + public String generateXML(DvObject dvObject) { + // Can't use "UNKNOWN" here because DataCite will respond with "[facet + // 'pattern'] the value 'unknown' is not accepted by the pattern '[\d]{4}'" + String publisherYearFinal = "9999"; + // FIXME: Investigate why this.publisherYear is sometimes null now that pull + // request #4606 has been merged. + if (this.publisherYear != null) { + // Added to prevent a NullPointerException when trying to destroy datasets when + // using DataCite rather than EZID. + publisherYearFinal = this.publisherYear; + } + xmlMetadata = template.replace("${identifier}", getIdentifier().trim()).replace("${title}", this.title) + .replace("${publisher}", this.publisher).replace("${publisherYear}", publisherYearFinal) + .replace("${description}", this.description); + + StringBuilder creatorsElement = new StringBuilder(); + if (authors != null && !authors.isEmpty()) { + for (DatasetAuthor author : authors) { + creatorsElement.append(""); + creatorsElement.append(author.getName().getDisplayValue()); + creatorsElement.append(""); + + if (author.getIdType() != null && author.getIdValue() != null && !author.getIdType().isEmpty() + && !author.getIdValue().isEmpty() && author.getAffiliation() != null + && !author.getAffiliation().getDisplayValue().isEmpty()) { + + if (author.getIdType().equals("ORCID")) { + creatorsElement.append( + "" + + author.getIdValue() + ""); + } + if (author.getIdType().equals("ISNI")) { + creatorsElement.append( + "" + + author.getIdValue() + ""); + } + if (author.getIdType().equals("LCNA")) { + creatorsElement.append( + "" + + author.getIdValue() + ""); + } + } + if (author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { + creatorsElement + .append("" + author.getAffiliation().getDisplayValue() + ""); + } + creatorsElement.append(""); + } + + } else { + creatorsElement.append("").append(AbstractPidProvider.UNAVAILABLE) + .append(""); + } + + xmlMetadata = xmlMetadata.replace("${creators}", creatorsElement.toString()); + + StringBuilder contributorsElement = new StringBuilder(); + if (this.getContacts() != null) { + for (String[] contact : this.getContacts()) { + if (!contact[0].isEmpty()) { + contributorsElement.append("" + + contact[0] + ""); + if (!contact[1].isEmpty()) { + contributorsElement.append("" + contact[1] + ""); + } + contributorsElement.append(""); + } + } + } + + if (this.getProducers() != null) { + for (String[] producer : this.getProducers()) { + contributorsElement.append("" + producer[0] + + ""); + if (!producer[1].isEmpty()) { + contributorsElement.append("" + producer[1] + ""); + } + contributorsElement.append(""); + } + } + + String relIdentifiers = generateRelatedIdentifiers(dvObject); + + xmlMetadata = xmlMetadata.replace("${relatedIdentifiers}", relIdentifiers); + + xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString()); + return xmlMetadata; + } + + private String generateRelatedIdentifiers(DvObject dvObject) { + + StringBuilder sb = new StringBuilder(); + if (dvObject.isInstanceofDataset()) { + Dataset dataset = (Dataset) dvObject; + if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { + + datafileIdentifiers = new ArrayList<>(); + for (DataFile dataFile : dataset.getFiles()) { + if (dataFile.getGlobalId() != null) { + if (sb.toString().isEmpty()) { + sb.append(""); + } + sb.append("" + + dataFile.getGlobalId() + ""); + } + } + + if (!sb.toString().isEmpty()) { + sb.append(""); + } + } + } else if (dvObject.isInstanceofDataFile()) { + DataFile df = (DataFile) dvObject; + sb.append(""); + sb.append("" + + df.getOwner().getGlobalId() + ""); + sb.append(""); + } + return sb.toString(); + } + + public void generateFileIdentifiers(DvObject dvObject) { + + if (dvObject.isInstanceofDataset()) { + Dataset dataset = (Dataset) dvObject; + + if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { + + datafileIdentifiers = new ArrayList<>(); + for (DataFile dataFile : dataset.getFiles()) { + datafileIdentifiers.add(dataFile.getIdentifier()); + int x = xmlMetadata.indexOf("") - 1; + xmlMetadata = xmlMetadata.replace("{relatedIdentifier}", dataFile.getIdentifier()); + xmlMetadata = xmlMetadata.substring(0, x) + "${relatedIdentifier}" + + template.substring(x, template.length() - 1); + + } + + } else { + xmlMetadata = xmlMetadata.replace( + "${relatedIdentifier}", + ""); + } + } + } + + public static String getTemplate() { + return template; + } + + public static void setTemplate(String template) { + XmlMetadataTemplate.template = template; + } + + public String getIdentifier() { + return identifier; + } + + public void setIdentifier(String identifier) { + this.identifier = identifier; + } + + public List getCreators() { + return creators; + } + + public void setCreators(List creators) { + this.creators = creators; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getPublisher() { + return publisher; + } + + public void setPublisher(String publisher) { + this.publisher = publisher; + } + + public String getPublisherYear() { + return publisherYear; + } + + public void setPublisherYear(String publisherYear) { + this.publisherYear = publisherYear; + } + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java new file mode 100644 index 00000000000..0e322eace05 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java @@ -0,0 +1,222 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package edu.harvard.iq.dataverse.pidproviders.doi.datacite; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.commons.text.StringEscapeUtils; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.branding.BrandingUtil; +import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.XmlMetadataTemplate; + +/** + * + * @author luopc + */ +public class DOIDataCiteRegisterService { + + private static final Logger logger = Logger.getLogger(DOIDataCiteRegisterService.class.getCanonicalName()); + + + //A singleton since it, and the httpClient in it can be reused. + private DataCiteRESTfullClient client=null; + + public DOIDataCiteRegisterService(String url, String username, String password) { + client = new DataCiteRESTfullClient(url, username, password); + } + + /** + * This "reserveIdentifier" method is heavily based on the + * "registerIdentifier" method below but doesn't, this one doesn't doesn't + * register a URL, which causes the "state" of DOI to transition from + * "draft" to "findable". Here are some DataCite docs on the matter: + * + * "DOIs can exist in three states: draft, registered, and findable. DOIs + * are in the draft state when metadata have been registered, and will + * transition to the findable state when registering a URL." -- + * https://support.datacite.org/docs/mds-api-guide#doi-states + */ + public String reserveIdentifier(String identifier, Map metadata, DvObject dvObject) throws IOException { + String retString = ""; + String xmlMetadata = getMetadataFromDvObject(identifier, metadata, dvObject); + + retString = client.postMetadata(xmlMetadata); + + return retString; + } + + public String registerIdentifier(String identifier, Map metadata, DvObject dvObject) throws IOException { + String retString = ""; + String xmlMetadata = getMetadataFromDvObject(identifier, metadata, dvObject); + String target = metadata.get("_target"); + + retString = client.postMetadata(xmlMetadata); + client.postUrl(identifier.substring(identifier.indexOf(":") + 1), target); + + return retString; + } + + public String deactivateIdentifier(String identifier, Map metadata, DvObject dvObject) throws IOException { + String retString = ""; + + String metadataString = getMetadataForDeactivateIdentifier(identifier, metadata, dvObject); + retString = client.postMetadata(metadataString); + retString = client.inactiveDataset(identifier.substring(identifier.indexOf(":") + 1)); + + return retString; + } + + public static String getMetadataFromDvObject(String identifier, Map metadata, DvObject dvObject) { + + Dataset dataset = null; + + if (dvObject instanceof Dataset) { + dataset = (Dataset) dvObject; + } else { + dataset = (Dataset) dvObject.getOwner(); + } + + XmlMetadataTemplate metadataTemplate = new XmlMetadataTemplate(); + metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); + metadataTemplate.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); + metadataTemplate.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); + if (dvObject.isInstanceofDataset()) { + //While getDescriptionPlainText strips < and > from HTML, it leaves '&' (at least so we need to xml escape as well + String description = StringEscapeUtils.escapeXml10(dataset.getLatestVersion().getDescriptionPlainText()); + if (description.isEmpty() || description.equals(DatasetField.NA_VALUE)) { + description = AbstractPidProvider.UNAVAILABLE; + } + metadataTemplate.setDescription(description); + } + if (dvObject.isInstanceofDataFile()) { + DataFile df = (DataFile) dvObject; + //Note: File metadata is not escaped like dataset metadata is, so adding an xml escape here. + //This could/should be removed if the datafile methods add escaping + String fileDescription = StringEscapeUtils.escapeXml10(df.getDescription()); + metadataTemplate.setDescription(fileDescription == null ? AbstractPidProvider.UNAVAILABLE : fileDescription); + } + + metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); + metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers()); + String title = dvObject.getCurrentName(); + if(dvObject.isInstanceofDataFile()) { + //Note file title is not currently escaped the way the dataset title is, so adding it here. + title = StringEscapeUtils.escapeXml10(title); + } + + if (title.isEmpty() || title.equals(DatasetField.NA_VALUE)) { + title = AbstractPidProvider.UNAVAILABLE; + } + + metadataTemplate.setTitle(title); + String producerString = BrandingUtil.getRootDataverseCollectionName(); + if (producerString.isEmpty() || producerString.equals(DatasetField.NA_VALUE)) { + producerString = AbstractPidProvider.UNAVAILABLE; + } + metadataTemplate.setPublisher(producerString); + metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); + + String xmlMetadata = metadataTemplate.generateXML(dvObject); + logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); + return xmlMetadata; + } + + public static String getMetadataForDeactivateIdentifier(String identifier, Map metadata, DvObject dvObject) { + + XmlMetadataTemplate metadataTemplate = new XmlMetadataTemplate(); + metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); + metadataTemplate.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); + + metadataTemplate.setDescription(AbstractPidProvider.UNAVAILABLE); + + String title =metadata.get("datacite.title"); + + System.out.print("Map metadata title: "+ metadata.get("datacite.title")); + + metadataTemplate.setAuthors(null); + + metadataTemplate.setTitle(title); + String producerString = AbstractPidProvider.UNAVAILABLE; + + metadataTemplate.setPublisher(producerString); + metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); + + String xmlMetadata = metadataTemplate.generateXML(dvObject); + logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); + return xmlMetadata; + } + + public String modifyIdentifier(String identifier, Map metadata, DvObject dvObject) + throws IOException { + + String xmlMetadata = getMetadataFromDvObject(identifier, metadata, dvObject); + + logger.fine("XML to send to DataCite: " + xmlMetadata); + + String status = metadata.get("_status").trim(); + String target = metadata.get("_target"); + String retString = ""; + switch (status) { + case DataCiteDOIProvider.DRAFT: + // draft DOIs aren't currently being updated after every edit - ToDo - should + // this be changed or made optional? + retString = "success to reserved " + identifier; + break; + case DataCiteDOIProvider.FINDABLE: + try { + retString = client.postMetadata(xmlMetadata); + client.postUrl(identifier.substring(identifier.indexOf(":") + 1), target); + } catch (UnsupportedEncodingException ex) { + logger.log(Level.SEVERE, null, ex); + } catch (RuntimeException rte) { + logger.log(Level.SEVERE, "Error creating DOI at DataCite: {0}", rte.getMessage()); + logger.log(Level.SEVERE, "Exception", rte); + } + break; + case DataCiteDOIProvider.REGISTERED: + retString = client.inactiveDataset(identifier.substring(identifier.indexOf(":") + 1)); + break; + } + return retString; + } + + public boolean testDOIExists(String identifier) { + boolean doiExists; + try { + doiExists = client.testDOIExists(identifier.substring(identifier.indexOf(":") + 1)); + } catch (Exception e) { + logger.log(Level.INFO, identifier, e); + return false; + } + return doiExists; + } + + Map getMetadata(String identifier) throws IOException { + Map metadata = new HashMap<>(); + try { + String xmlMetadata = client.getMetadata(identifier.substring(identifier.indexOf(":") + 1)); + XmlMetadataTemplate template = new XmlMetadataTemplate(xmlMetadata); + metadata.put("datacite.creator", String.join("; ", template.getCreators())); + metadata.put("datacite.title", template.getTitle()); + metadata.put("datacite.publisher", template.getPublisher()); + metadata.put("datacite.publicationyear", template.getPublisherYear()); + } catch (RuntimeException e) { + logger.log(Level.INFO, identifier, e); + } + return metadata; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java new file mode 100644 index 00000000000..23078e2719b --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java @@ -0,0 +1,319 @@ +package edu.harvard.iq.dataverse.pidproviders.doi.datacite; + +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.Base64; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import org.apache.commons.httpclient.HttpException; +import org.apache.commons.httpclient.HttpStatus; + +/** + * + * @author luopc + */ +public class DataCiteDOIProvider extends AbstractDOIProvider { + + private static final Logger logger = Logger.getLogger(DataCiteDOIProvider.class.getCanonicalName()); + + static final String FINDABLE = "findable"; //public - published dataset versions + static final String DRAFT = "draft"; //reserved but not findable yet - draft/unpublished datasets + static final String REGISTERED = "registered"; //was findable once, not anymore - deaccessioned datasets + static final String NONE = "none"; //no record - draft/unpublished datasets where the initial request to reserve has failed + + public static final String TYPE = "datacite"; + + + private String mdsUrl; + private String apiUrl; + private String username; + private String password; + + private DOIDataCiteRegisterService doiDataCiteRegisterService; + + public DataCiteDOIProvider(String id, String label, String providerAuthority, String providerShoulder, + String identifierGenerationStyle, String datafilePidFormat, String managedList, String excludedList, + String mdsUrl, String apiUrl, String username, String password) { + super(id, label, providerAuthority, providerShoulder, identifierGenerationStyle, datafilePidFormat, managedList, + excludedList); + this.mdsUrl = mdsUrl; + this.apiUrl = apiUrl; + this.username = username; + this.password = password; + doiDataCiteRegisterService = new DOIDataCiteRegisterService(mdsUrl, username, password); + } + + @Override + public boolean registerWhenPublished() { + return false; + } + + @Override + public boolean alreadyRegistered(GlobalId pid, boolean noProviderDefault) { + logger.log(Level.FINE, "alreadyRegistered"); + if (pid == null || pid.asString().isEmpty()) { + logger.fine("No identifier sent."); + return false; + } + boolean alreadyRegistered; + String identifier = pid.asString(); + try { + alreadyRegistered = doiDataCiteRegisterService.testDOIExists(identifier); + } catch (Exception e) { + logger.log(Level.WARNING, "alreadyRegistered failed"); + return false; + } + return alreadyRegistered; + } + + @Override + public String createIdentifier(DvObject dvObject) throws Exception { + logger.log(Level.FINE, "createIdentifier"); + if (dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty()) { + dvObject = generatePid(dvObject); + } + String identifier = getIdentifier(dvObject); + Map metadata = getMetadataForCreateIndicator(dvObject); + metadata.put("_status", DRAFT); + try { + String retString = doiDataCiteRegisterService.reserveIdentifier(identifier, metadata, dvObject); + logger.log(Level.FINE, "create DOI identifier retString : " + retString); + return retString; + } catch (Exception e) { + logger.log(Level.WARNING, "Identifier not created: create failed", e); + throw e; + } + } + + @Override + public Map getIdentifierMetadata(DvObject dvObject) { + logger.log(Level.FINE, "getIdentifierMetadata"); + String identifier = getIdentifier(dvObject); + Map metadata = new HashMap<>(); + try { + metadata = doiDataCiteRegisterService.getMetadata(identifier); + metadata.put("_status", getPidStatus(dvObject)); + } catch (Exception e) { + logger.log(Level.WARNING, "getIdentifierMetadata failed", e); + } + return metadata; + } + + /** + * Modifies the DOI metadata for a Dataset + * + * @param dvObject the dvObject whose metadata needs to be modified + * @return the Dataset identifier, or null if the modification failed + * @throws java.lang.Exception + */ + @Override + public String modifyIdentifierTargetURL(DvObject dvObject) throws Exception { + logger.log(Level.FINE, "modifyIdentifier"); + String identifier = getIdentifier(dvObject); + try { + Map metadata = getIdentifierMetadata(dvObject); + doiDataCiteRegisterService.modifyIdentifier(identifier, metadata, dvObject); + } catch (Exception e) { + logger.log(Level.WARNING, "modifyMetadata failed", e); + throw e; + } + return identifier; + } + + /* + * Deletes a DOI if it is in DRAFT/DRAFT state or removes metadata and + * changes it from PUBLIC/FINDABLE to REGISTERED. + */ + @Override + public void deleteIdentifier(DvObject dvObject) throws IOException, HttpException { + logger.log(Level.FINE, "deleteIdentifier"); + String identifier = getIdentifier(dvObject); + String idStatus = getPidStatus(dvObject); + switch (idStatus) { + case DRAFT: + logger.log(Level.FINE, "Delete status is reserved.."); + deleteDraftIdentifier(dvObject); + break; + case FINDABLE: + // if public then it has been released set to REGISTERED/unavailable and reset + // target to n2t url + Map metadata = addDOIMetadataForDestroyedDataset(dvObject); + metadata.put("_status", "registered"); + metadata.put("_target", getTargetUrl(dvObject)); + doiDataCiteRegisterService.deactivateIdentifier(identifier, metadata, dvObject); + break; + + case REGISTERED: + case NONE: + // Nothing to do + } + } + + /** + * Deletes DOI from the DataCite side, if possible. Only "draft" DOIs can be + * deleted. + */ + private void deleteDraftIdentifier(DvObject dvObject) throws IOException { + + GlobalId doi = dvObject.getGlobalId(); + /** + * Deletes the DOI from DataCite if it can. Returns 204 if PID was deleted (only + * possible for "draft" DOIs), 405 (method not allowed) if the DOI wasn't + * deleted (because it's in "findable" state, for example, 404 if the DOI wasn't + * found, and possibly other status codes such as 500 if DataCite is down. + */ + + URL url = new URL(getApiUrl() + "/dois/" + doi.getAuthority() + "/" + doi.getIdentifier()); + HttpURLConnection connection = null; + connection = (HttpURLConnection) url.openConnection(); + connection.setRequestMethod("DELETE"); + String userpass = getUsername() + ":" + getPassword(); + String basicAuth = "Basic " + new String(Base64.getEncoder().encode(userpass.getBytes())); + connection.setRequestProperty("Authorization", basicAuth); + int status = connection.getResponseCode(); + if (status != HttpStatus.SC_NO_CONTENT) { + logger.warning( + "Incorrect Response Status from DataCite: " + status + " : " + connection.getResponseMessage()); + throw new HttpException("Status: " + status); + } + logger.fine("deleteDoi status for " + doi.asString() + ": " + status); + } + + @Override + public boolean publicizeIdentifier(DvObject dvObject) { + logger.log(Level.FINE, "updateIdentifierStatus"); + if (dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty()) { + dvObject = generatePid(dvObject); + } + String identifier = getIdentifier(dvObject); + Map metadata = getUpdateMetadata(dvObject); + metadata.put("_status", FINDABLE); + metadata.put("datacite.publicationyear", generateYear(dvObject)); + metadata.put("_target", getTargetUrl(dvObject)); + try { + doiDataCiteRegisterService.registerIdentifier(identifier, metadata, dvObject); + return true; + } catch (Exception e) { + logger.log(Level.WARNING, "modifyMetadata failed: " + e.getMessage(), e); + return false; + } + } + + @Override + public List getProviderInformation() { + return List.of(getId(), "https://status.datacite.org"); + } + + @Override + protected String getProviderKeyName() { + return "DataCite"; + } + + @Override + public String getProviderType() { + // TODO Auto-generated method stub + return null; + } + + public String getMdsUrl() { + return mdsUrl; + } + + public String getApiUrl() { + return apiUrl; + } + + public String getUsername() { + return username; + } + + public String getPassword() { + return password; + } + + /** + * Method to determine the status of a dvObject's PID. It replaces keeping a + * separate DOIDataCiteRegisterCache. We could also try to get this info from + * DataCite directly, but it appears to not be in the xml metadata return, so it + * would require another/different api call (possible ToDo). + * + * @param dvObject - Dataset or DataFile + * @return PID status - NONE, DRAFT, FINDABLE, or REGISTERED + */ + String getPidStatus(DvObject dvObject) { + String status = NONE; + if (dvObject instanceof Dataset) { + Dataset dataset = (Dataset) dvObject; + // return true, if all published versions were deaccessioned + boolean hasDeaccessionedVersions = false; + for (DatasetVersion testDsv : dataset.getVersions()) { + if (testDsv.isReleased()) { + // With any released version, we're done + return FINDABLE; + } + // Also check for draft version + if (testDsv.isDraft()) { + if (dataset.isIdentifierRegistered()) { + status = DRAFT; + // Keep interating to see if there's a released version + } + } + if (testDsv.isDeaccessioned()) { + hasDeaccessionedVersions = true; + // Keep interating to see if there's a released version + } + } + if (hasDeaccessionedVersions) { + if (dataset.isIdentifierRegistered()) { + return REGISTERED; + } + } + return status; + } else if (dvObject instanceof DataFile) { + DataFile df = (DataFile) dvObject; + // return true, if all published versions were deaccessioned + boolean isInDeaccessionedVersions = false; + for (FileMetadata fmd : df.getFileMetadatas()) { + DatasetVersion testDsv = fmd.getDatasetVersion(); + if (testDsv.isReleased()) { + // With any released version, we're done + return FINDABLE; + } + // Also check for draft version + if (testDsv.isDraft()) { + if (df.isIdentifierRegistered()) { + status = DRAFT; + // Keep interating to see if there's a released/deaccessioned version + } + } + if (testDsv.isDeaccessioned()) { + isInDeaccessionedVersions = true; + // Keep interating to see if there's a released version + } + } + if (isInDeaccessionedVersions) { + if (df.isIdentifierRegistered()) { + return REGISTERED; + } + } + + } + return status; + } + + + + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteProviderFactory.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteProviderFactory.java new file mode 100644 index 00000000000..99d13b2647c --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteProviderFactory.java @@ -0,0 +1,43 @@ +package edu.harvard.iq.dataverse.pidproviders.doi.datacite; + +import com.google.auto.service.AutoService; + +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactory; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.SystemConfig; + +@AutoService(PidProviderFactory.class) +public class DataCiteProviderFactory implements PidProviderFactory { + + @Override + public PidProvider createPidProvider(String providerId) { + String providerType = JvmSettings.PID_PROVIDER_TYPE.lookup(providerId); + if (!providerType.equals(DataCiteDOIProvider.TYPE)) { + // Being asked to create a non-DataCite provider + return null; + } + String providerLabel = JvmSettings.PID_PROVIDER_LABEL.lookup(providerId); + String providerAuthority = JvmSettings.PID_PROVIDER_AUTHORITY.lookup(providerId); + String providerShoulder = JvmSettings.PID_PROVIDER_SHOULDER.lookupOptional(providerId).orElse(""); + String identifierGenerationStyle = JvmSettings.PID_PROVIDER_IDENTIFIER_GENERATION_STYLE + .lookupOptional(providerId).orElse("randomString"); + String datafilePidFormat = JvmSettings.PID_PROVIDER_DATAFILE_PID_FORMAT.lookupOptional(providerId) + .orElse(SystemConfig.DataFilePIDFormat.DEPENDENT.toString()); + String managedList = JvmSettings.PID_PROVIDER_MANAGED_LIST.lookupOptional(providerId).orElse(""); + String excludedList = JvmSettings.PID_PROVIDER_EXCLUDED_LIST.lookupOptional(providerId).orElse(""); + + String mdsUrl = JvmSettings.DATACITE_MDS_API_URL.lookupOptional(providerId).orElse("https://mds.test.datacite.org"); + String apiUrl = JvmSettings.DATACITE_REST_API_URL.lookupOptional(providerId).orElse("https://api.test.datacite.org"); + String username = JvmSettings.DATACITE_USERNAME.lookup(providerId); + String password = JvmSettings.DATACITE_PASSWORD.lookup(providerId); + + return new DataCiteDOIProvider(providerId, providerLabel, providerAuthority, providerShoulder, identifierGenerationStyle, + datafilePidFormat, managedList, excludedList, mdsUrl, apiUrl, username, password); + } + + public String getType() { + return DataCiteDOIProvider.TYPE; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/DataCiteRESTfullClient.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java similarity index 92% rename from src/main/java/edu/harvard/iq/dataverse/DataCiteRESTfullClient.java rename to src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java index 491f19ab36c..d185b0249b9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataCiteRESTfullClient.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java @@ -3,7 +3,7 @@ * To change this template file, choose Tools | Templates * and open the template in the editor. */ -package edu.harvard.iq.dataverse; +package edu.harvard.iq.dataverse.pidproviders.doi.datacite; import java.io.Closeable; @@ -45,21 +45,14 @@ public class DataCiteRESTfullClient implements Closeable { private HttpClientContext context; private String encoding = "utf-8"; - public DataCiteRESTfullClient(String url, String username, String password) throws IOException { + public DataCiteRESTfullClient(String url, String username, String password) { this.url = url; - try { - context = HttpClientContext.create(); - CredentialsProvider credsProvider = new BasicCredentialsProvider(); - credsProvider.setCredentials(new AuthScope(null, -1), - new UsernamePasswordCredentials(username, password)); - context.setCredentialsProvider(credsProvider); - - httpClient = HttpClients.createDefault(); - } catch (Exception ioe) { - close(); - logger.log(Level.SEVERE,"Fail to init Client",ioe); - throw new RuntimeException("Fail to init Client", ioe); - } + context = HttpClientContext.create(); + CredentialsProvider credsProvider = new BasicCredentialsProvider(); + credsProvider.setCredentials(new AuthScope(null, -1), new UsernamePasswordCredentials(username, password)); + context.setCredentialsProvider(credsProvider); + + httpClient = HttpClients.createDefault(); } public void close() { diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/ezid/EZIdDOIProvider.java similarity index 89% rename from src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java rename to src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/ezid/EZIdDOIProvider.java index 86b74b72f30..fe8f1ec9c70 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/ezid/EZIdDOIProvider.java @@ -1,5 +1,10 @@ -package edu.harvard.iq.dataverse; +package edu.harvard.iq.dataverse.pidproviders.doi.ezid; +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.ucsb.nceas.ezid.EZIDException; import edu.ucsb.nceas.ezid.EZIDService; @@ -13,40 +18,40 @@ * * @author skraffmiller */ -@Stateless -public class DOIEZIdServiceBean extends DOIServiceBean { +public class EZIdDOIProvider extends AbstractDOIProvider { - private static final Logger logger = Logger.getLogger(DOIEZIdServiceBean.class.getCanonicalName()); + + + private static final Logger logger = Logger.getLogger(EZIdDOIProvider.class.getCanonicalName()); EZIDService ezidService; - // This has a sane default in microprofile-config.properties - private final String baseUrl = JvmSettings.EZID_API_URL.lookup(); + public static final String TYPE = "ezid"; + + private String baseUrl; - public DOIEZIdServiceBean() { + + public EZIdDOIProvider(String id, String label, String providerAuthority, String providerShoulder, String identifierGenerationStyle, + String datafilePidFormat, String managedList, String excludedList, String baseUrl, String username, String password) { + super(id, label, providerAuthority, providerShoulder, identifierGenerationStyle, datafilePidFormat, managedList, excludedList); // Creating the service doesn't do any harm, just initializing some object data here. // Makes sure we don't run into NPEs from the other methods, but will obviously fail if the // login below does not work. - this.ezidService = new EZIDService(this.baseUrl); + this.baseUrl = baseUrl; + this.ezidService = new EZIDService(baseUrl); try { - // These have (obviously) no default, but still are optional to make the provider optional - String username = JvmSettings.EZID_USERNAME.lookupOptional().orElse(null); - String password = JvmSettings.EZID_PASSWORD.lookupOptional().orElse(null); - - if (username != null ^ password != null) { - logger.log(Level.WARNING, "You must give both username and password. Will not try to login."); - } - + if (username != null && password != null) { this.ezidService.login(username, password); - this.configured = true; + } else { + logger.log(Level.WARNING, "You must give both username and password. Will not try to login."); } } catch (EZIDException e) { // We only do the warnings here, but the object still needs to be created. // The EJB stateless thing expects this to go through, and it is requested on any // global id parsing. - logger.log(Level.WARNING, "Login failed to {0}", this.baseUrl); + logger.log(Level.WARNING, "Login failed to {0}", baseUrl); logger.log(Level.WARNING, "Exception String: {0}", e.toString()); logger.log(Level.WARNING, "Localized message: {0}", e.getLocalizedMessage()); logger.log(Level.WARNING, "Cause:", e.getCause()); @@ -227,14 +232,14 @@ private boolean updateIdentifierStatus(DvObject dvObject, String statusIn) { @Override public List getProviderInformation(){ - return List.of("EZID", this.baseUrl); + return List.of(getId(), this.baseUrl); } @Override public String createIdentifier(DvObject dvObject) throws Throwable { logger.log(Level.FINE, "createIdentifier"); if(dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty() ){ - dvObject = generateIdentifier(dvObject); + dvObject = generatePid(dvObject); } String identifier = getIdentifier(dvObject); Map metadata = getMetadataForCreateIndicator(dvObject); @@ -271,7 +276,7 @@ public String createIdentifier(DvObject dvObject) throws Throwable { * @return A HashMap with the same values as {@code map} */ private HashMap asHashMap(Map map) { - return (map instanceof HashMap) ? (HashMap)map : new HashMap<>(map); + return (map instanceof HashMap) ? (HashMap)map : new HashMap<>(map); } @Override @@ -279,5 +284,9 @@ protected String getProviderKeyName() { return "EZID"; } + @Override + public String getProviderType() { + return TYPE; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/ezid/EZIdProviderFactory.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/ezid/EZIdProviderFactory.java new file mode 100644 index 00000000000..95ad9bdeff0 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/ezid/EZIdProviderFactory.java @@ -0,0 +1,42 @@ +package edu.harvard.iq.dataverse.pidproviders.doi.ezid; + +import com.google.auto.service.AutoService; + +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactory; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.SystemConfig; + +@AutoService(PidProviderFactory.class) +public class EZIdProviderFactory implements PidProviderFactory { + + @Override + public PidProvider createPidProvider(String providerId) { + String providerType = JvmSettings.PID_PROVIDER_TYPE.lookup(providerId); + if (!providerType.equals(EZIdDOIProvider.TYPE)) { + // Being asked to create a non-EZId provider + return null; + } + String providerLabel = JvmSettings.PID_PROVIDER_LABEL.lookup(providerId); + String providerAuthority = JvmSettings.PID_PROVIDER_AUTHORITY.lookup(providerId); + String providerShoulder = JvmSettings.PID_PROVIDER_SHOULDER.lookupOptional(providerId).orElse(""); + String identifierGenerationStyle = JvmSettings.PID_PROVIDER_IDENTIFIER_GENERATION_STYLE + .lookupOptional(providerId).orElse("randomString"); + String datafilePidFormat = JvmSettings.PID_PROVIDER_DATAFILE_PID_FORMAT.lookupOptional(providerId) + .orElse(SystemConfig.DataFilePIDFormat.DEPENDENT.toString()); + String managedList = JvmSettings.PID_PROVIDER_MANAGED_LIST.lookupOptional(providerId).orElse(""); + String excludedList = JvmSettings.PID_PROVIDER_EXCLUDED_LIST.lookupOptional(providerId).orElse(""); + + String baseUrl = JvmSettings.EZID_API_URL.lookupOptional(providerId).orElse("https://ezid.cdlib.org"); + String username = JvmSettings.EZID_USERNAME.lookup(providerId); + String password = JvmSettings.EZID_PASSWORD.lookup(providerId); + + return new EZIdDOIProvider(providerId, providerLabel, providerAuthority, providerShoulder, identifierGenerationStyle, datafilePidFormat, + managedList, excludedList, baseUrl, username, password); + } + + public String getType() { + return EZIdDOIProvider.TYPE; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/FakePidProviderServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/fake/FakeDOIProvider.java similarity index 59% rename from src/main/java/edu/harvard/iq/dataverse/pidproviders/FakePidProviderServiceBean.java rename to src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/fake/FakeDOIProvider.java index 3bd9d9dd022..a967fb40620 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/FakePidProviderServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/fake/FakeDOIProvider.java @@ -1,22 +1,22 @@ -package edu.harvard.iq.dataverse.pidproviders; +package edu.harvard.iq.dataverse.pidproviders.doi.fake; -import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.logging.Logger; -import jakarta.ejb.Stateless; +public class FakeDOIProvider extends AbstractDOIProvider { -@Stateless -public class FakePidProviderServiceBean extends DOIServiceBean { + public static final String TYPE = "FAKE"; - private static final Logger logger = Logger.getLogger(FakePidProviderServiceBean.class.getCanonicalName()); + public FakeDOIProvider(String id, String label, String providerAuthority, String providerShoulder, String identifierGenerationStyle, + String datafilePidFormat, String managedList, String excludedList) { + super(id, label, providerAuthority, providerShoulder, identifierGenerationStyle, datafilePidFormat, managedList, excludedList); + } - //Only need to check locally public boolean isGlobalIdUnique(GlobalId globalId) { try { @@ -29,7 +29,7 @@ public boolean isGlobalIdUnique(GlobalId globalId) { @Override public boolean alreadyRegistered(GlobalId globalId, boolean noProviderDefault) { - boolean existsLocally = !dvObjectService.isGlobalIdLocallyUnique(globalId); + boolean existsLocally = !pidProviderService.isGlobalIdLocallyUnique(globalId); return existsLocally ? existsLocally : noProviderDefault; } @@ -40,7 +40,7 @@ public boolean registerWhenPublished() { @Override public List getProviderInformation() { - return List.of("FAKE", "https://dataverse.org"); + return List.of(getId(), "https://dataverse.org"); } @Override @@ -65,7 +65,10 @@ public void deleteIdentifier(DvObject dvo) throws Exception { } @Override - public boolean publicizeIdentifier(DvObject studyIn) { + public boolean publicizeIdentifier(DvObject dvObject) { + if(dvObject.isInstanceofDataFile() && dvObject.getGlobalId()==null) { + generatePid(dvObject); + } return true; } @@ -74,4 +77,9 @@ protected String getProviderKeyName() { return "FAKE"; } + @Override + public String getProviderType() { + return TYPE; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/fake/FakeProviderFactory.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/fake/FakeProviderFactory.java new file mode 100644 index 00000000000..292c39d4383 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/fake/FakeProviderFactory.java @@ -0,0 +1,38 @@ +package edu.harvard.iq.dataverse.pidproviders.doi.fake; + +import com.google.auto.service.AutoService; + +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactory; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.SystemConfig; + +@AutoService(PidProviderFactory.class) +public class FakeProviderFactory implements PidProviderFactory { + + @Override + public PidProvider createPidProvider(String providerId) { + String providerType = JvmSettings.PID_PROVIDER_TYPE.lookup(providerId); + if (!providerType.equals(FakeDOIProvider.TYPE)) { + // Being asked to create a non-EZId provider + return null; + } + String providerLabel = JvmSettings.PID_PROVIDER_LABEL.lookup(providerId); + String providerAuthority = JvmSettings.PID_PROVIDER_AUTHORITY.lookup(providerId); + String providerShoulder = JvmSettings.PID_PROVIDER_SHOULDER.lookupOptional(providerId).orElse(""); + String identifierGenerationStyle = JvmSettings.PID_PROVIDER_IDENTIFIER_GENERATION_STYLE + .lookupOptional(providerId).orElse("randomString"); + String datafilePidFormat = JvmSettings.PID_PROVIDER_DATAFILE_PID_FORMAT.lookupOptional(providerId) + .orElse(SystemConfig.DataFilePIDFormat.DEPENDENT.toString()); + String managedList = JvmSettings.PID_PROVIDER_MANAGED_LIST.lookupOptional(providerId).orElse("");; + String excludedList = JvmSettings.PID_PROVIDER_EXCLUDED_LIST.lookupOptional(providerId).orElse("");; + + return new FakeDOIProvider(providerId, providerLabel, providerAuthority, providerShoulder, identifierGenerationStyle, + datafilePidFormat, managedList, excludedList); + } + + public String getType() { + return FakeDOIProvider.TYPE; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandlePidProvider.java similarity index 87% rename from src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java rename to src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandlePidProvider.java index 4942db9e7ec..2627bc76fd9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandlePidProvider.java @@ -18,10 +18,12 @@ Version 3.0. */ -package edu.harvard.iq.dataverse; +package edu.harvard.iq.dataverse.pidproviders.handle; -import edu.harvard.iq.dataverse.settings.JvmSettings; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; import java.io.File; import java.io.FileInputStream; @@ -29,8 +31,6 @@ import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; -import jakarta.ejb.EJB; -import jakarta.ejb.Stateless; import java.security.PrivateKey; /* Handlenet imports: */ @@ -60,23 +60,32 @@ * As of now, it only does the registration updates, to accommodate * the modifyRegistration datasets API sub-command. */ -@Stateless -public class HandlenetServiceBean extends AbstractGlobalIdServiceBean { - - @EJB - DataverseServiceBean dataverseService; - @EJB - SettingsServiceBean settingsService; - private static final Logger logger = Logger.getLogger(HandlenetServiceBean.class.getCanonicalName()); +public class HandlePidProvider extends AbstractPidProvider { + + private static final Logger logger = Logger.getLogger(HandlePidProvider.class.getCanonicalName()); public static final String HDL_PROTOCOL = "hdl"; - int handlenetIndex = JvmSettings.HANDLENET_INDEX.lookup(Integer.class); + public static final String TYPE = "hdl"; public static final String HTTP_HDL_RESOLVER_URL = "http://hdl.handle.net/"; public static final String HDL_RESOLVER_URL = "https://hdl.handle.net/"; + + - public HandlenetServiceBean() { - logger.log(Level.FINE,"Constructor"); - configured = true; + int handlenetIndex; + private boolean isIndependentHandleService; + private String authHandle; + private String keyPath; + private String keyPassphrase; + + public HandlePidProvider(String id, String label, String authority, String shoulder, String identifierGenerationStyle, + String datafilePidFormat, String managedList, String excludedList, int index, boolean isIndependentService, String authHandle, String path, String passphrase) { + super(id, label, HDL_PROTOCOL, authority, shoulder, identifierGenerationStyle, datafilePidFormat, managedList, excludedList); + this.handlenetIndex = index; + this.isIndependentHandleService = isIndependentService; + this.authHandle = authHandle; + this.keyPath = path; + this.keyPassphrase = passphrase; + } @Override @@ -231,10 +240,9 @@ private ResolutionRequest buildResolutionRequest(final String handle) { private PublicKeyAuthenticationInfo getAuthInfo(String handlePrefix) { logger.log(Level.FINE,"getAuthInfo"); byte[] key = null; - String adminCredFile = JvmSettings.HANDLENET_KEY_PATH.lookup(); - int handlenetIndex = JvmSettings.HANDLENET_INDEX.lookup(Integer.class); + String adminCredFile = getKeyPath(); - key = readKey(adminCredFile); + key = readKey(adminCredFile); PrivateKey privkey = null; privkey = readPrivKey(key, adminCredFile); String authHandle = getAuthenticationHandle(handlePrefix); @@ -244,8 +252,8 @@ private PublicKeyAuthenticationInfo getAuthInfo(String handlePrefix) { } private String getRegistrationUrl(DvObject dvObject) { logger.log(Level.FINE,"getRegistrationUrl"); - String siteUrl = systemConfig.getDataverseSiteUrl(); - String targetUrl = siteUrl + dvObject.getTargetUrl() + "hdl:" + dvObject.getAuthority() + String siteUrl = SystemConfig.getDataverseSiteUrlStatic(); + String targetUrl = siteUrl + dvObject.getTargetUrl() + "hdl:" + dvObject.getAuthority() + "/" + dvObject.getIdentifier(); return targetUrl; } @@ -278,8 +286,7 @@ private PrivateKey readPrivKey(byte[] key, final String file) { try { byte[] secKey = null; if ( Util.requiresSecretKey(key) ) { - String secret = JvmSettings.HANDLENET_KEY_PASSPHRASE.lookup(); - secKey = secret.getBytes(StandardCharsets.UTF_8); + secKey = getKeyPassphrase().getBytes(StandardCharsets.UTF_8); } key = Util.decrypt(key, secKey); privkey = Util.getPrivateKeyFromBytes(key, 0); @@ -304,9 +311,9 @@ private String getAuthenticationHandle(DvObject dvObject){ private String getAuthenticationHandle(String handlePrefix) { logger.log(Level.FINE,"getAuthenticationHandle"); - if (systemConfig.getHandleAuthHandle()!=null) { - return systemConfig.getHandleAuthHandle(); - } else if (systemConfig.isIndependentHandleService()) { + if (getHandleAuthHandle()!=null) { + return getHandleAuthHandle(); + } else if (isIndependentHandleService()) { return handlePrefix + "/ADMIN"; } else { return "0.NA/" + handlePrefix; @@ -348,9 +355,8 @@ public void deleteIdentifier(DvObject dvObject) throws Exception { String handle = getDvObjectHandle(dvObject); String authHandle = getAuthenticationHandle(dvObject); - String adminCredFile = JvmSettings.HANDLENET_KEY_PATH.lookup(); - int handlenetIndex = JvmSettings.HANDLENET_INDEX.lookup(Integer.class); - + String adminCredFile = getKeyPath(); + byte[] key = readKey(adminCredFile); PrivateKey privkey = readPrivKey(key, adminCredFile); @@ -383,7 +389,7 @@ private boolean updateIdentifierStatus(DvObject dvObject, String statusIn) { @Override public List getProviderInformation(){ - return List.of("Handle", "https://hdl.handle.net"); + return List.of(getId(), HDL_RESOLVER_URL); } @@ -401,7 +407,7 @@ public String createIdentifier(DvObject dvObject) throws Throwable { @Override public boolean publicizeIdentifier(DvObject dvObject) { if (dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty()){ - generateIdentifier(dvObject); + generatePid(dvObject); } return updateIdentifierStatus(dvObject, "public"); @@ -438,6 +444,32 @@ public GlobalId parsePersistentId(String protocol, String authority, String iden public String getUrlPrefix() { return HDL_RESOLVER_URL; } + + @Override + public String getProtocol() { + return HDL_PROTOCOL; + } + + @Override + public String getProviderType() { + return TYPE; + } + + public String getKeyPath() { + return keyPath; + } + + public String getKeyPassphrase() { + return keyPassphrase; + } + + public boolean isIndependentHandleService() { + return isIndependentHandleService; + } + + public String getHandleAuthHandle() { + return authHandle; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandleProviderFactory.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandleProviderFactory.java new file mode 100644 index 00000000000..eef5bed8432 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandleProviderFactory.java @@ -0,0 +1,45 @@ +package edu.harvard.iq.dataverse.pidproviders.handle; + +import com.google.auto.service.AutoService; + +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactory; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.SystemConfig; + +@AutoService(PidProviderFactory.class) +public class HandleProviderFactory implements PidProviderFactory { + + @Override + public PidProvider createPidProvider(String providerId) { + String providerType = JvmSettings.PID_PROVIDER_TYPE.lookup(providerId); + if (!providerType.equals(HandlePidProvider.TYPE)) { + // Being asked to create a non-EZId provider + return null; + } + String providerLabel = JvmSettings.PID_PROVIDER_LABEL.lookup(providerId); + String providerAuthority = JvmSettings.PID_PROVIDER_AUTHORITY.lookup(providerId); + String providerShoulder = JvmSettings.PID_PROVIDER_SHOULDER.lookupOptional(providerId).orElse(""); + String identifierGenerationStyle = JvmSettings.PID_PROVIDER_IDENTIFIER_GENERATION_STYLE + .lookupOptional(providerId).orElse("randomString"); + String datafilePidFormat = JvmSettings.PID_PROVIDER_DATAFILE_PID_FORMAT.lookupOptional(providerId) + .orElse(SystemConfig.DataFilePIDFormat.DEPENDENT.toString()); + String managedList = JvmSettings.PID_PROVIDER_MANAGED_LIST.lookupOptional(providerId).orElse(""); + String excludedList = JvmSettings.PID_PROVIDER_EXCLUDED_LIST.lookupOptional(providerId).orElse(""); + + int index = JvmSettings.HANDLENET_INDEX.lookupOptional(Integer.class, providerId).orElse(300); + boolean independentHandleService = JvmSettings.HANDLENET_INDEPENDENT_SERVICE + .lookupOptional(Boolean.class, providerId).orElse(false); + String handleAuthHandle = JvmSettings.HANDLENET_AUTH_HANDLE.lookup(providerId); + String path = JvmSettings.HANDLENET_KEY_PATH.lookup(providerId); + String passphrase = JvmSettings.HANDLENET_KEY_PASSPHRASE.lookup(providerId); + return new HandlePidProvider(providerId, providerLabel, providerAuthority, providerShoulder, identifierGenerationStyle, + datafilePidFormat, managedList, excludedList, index, independentHandleService, handleAuthHandle, path, + passphrase); + } + + public String getType() { + return HandlePidProvider.TYPE; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedHandlenetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/UnmanagedHandlePidProvider.java similarity index 61% rename from src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedHandlenetServiceBean.java rename to src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/UnmanagedHandlePidProvider.java index c856c5363e0..075e10d8164 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedHandlenetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/UnmanagedHandlePidProvider.java @@ -1,27 +1,32 @@ -package edu.harvard.iq.dataverse.pidproviders; +package edu.harvard.iq.dataverse.pidproviders.handle; -import edu.harvard.iq.dataverse.AbstractGlobalIdServiceBean; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.HandlenetServiceBean; +import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; + import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; import jakarta.ejb.Stateless; import org.apache.commons.lang3.NotImplementedException; -/** This class is just used to parse Handles that are not managed by any account configured in Dataverse - * It does not implement any of the methods related to PID CRUD +/** + * This class is just used to parse Handles that are not managed by any account + * configured in Dataverse It does not implement any of the methods related to + * PID CRUD * */ -@Stateless -public class UnmanagedHandlenetServiceBean extends AbstractGlobalIdServiceBean { - private static final Logger logger = Logger.getLogger(UnmanagedHandlenetServiceBean.class.getCanonicalName()); +public class UnmanagedHandlePidProvider extends AbstractPidProvider { + + private static final Logger logger = Logger.getLogger(UnmanagedHandlePidProvider.class.getCanonicalName()); + public static final String ID = "UnmanagedHandleProvider"; - public UnmanagedHandlenetServiceBean() { + public UnmanagedHandlePidProvider() { + // Also using ID as label + super(ID, ID, HandlePidProvider.HDL_PROTOCOL); logger.log(Level.FINE, "Constructor"); - configured = true; + } @Override @@ -56,7 +61,7 @@ public void deleteIdentifier(DvObject dvObject) throws Exception { @Override public List getProviderInformation() { - return List.of("UnmanagedHandle", ""); + return List.of(getId(), ""); } @Override @@ -71,19 +76,18 @@ public boolean publicizeIdentifier(DvObject dvObject) { @Override public GlobalId parsePersistentId(String pidString) { - if (pidString.startsWith(HandlenetServiceBean.HDL_RESOLVER_URL)) { - pidString = pidString.replace(HandlenetServiceBean.HDL_RESOLVER_URL, - (HandlenetServiceBean.HDL_PROTOCOL + ":")); - } else if (pidString.startsWith(HandlenetServiceBean.HTTP_HDL_RESOLVER_URL)) { - pidString = pidString.replace(HandlenetServiceBean.HTTP_HDL_RESOLVER_URL, - (HandlenetServiceBean.HDL_PROTOCOL + ":")); + if (pidString.startsWith(HandlePidProvider.HDL_RESOLVER_URL)) { + pidString = pidString.replace(HandlePidProvider.HDL_RESOLVER_URL, (HandlePidProvider.HDL_PROTOCOL + ":")); + } else if (pidString.startsWith(HandlePidProvider.HTTP_HDL_RESOLVER_URL)) { + pidString = pidString.replace(HandlePidProvider.HTTP_HDL_RESOLVER_URL, + (HandlePidProvider.HDL_PROTOCOL + ":")); } return super.parsePersistentId(pidString); } @Override public GlobalId parsePersistentId(String protocol, String identifierString) { - if (!HandlenetServiceBean.HDL_PROTOCOL.equals(protocol)) { + if (!HandlePidProvider.HDL_PROTOCOL.equals(protocol)) { return null; } GlobalId globalId = super.parsePersistentId(protocol, identifierString); @@ -92,7 +96,7 @@ public GlobalId parsePersistentId(String protocol, String identifierString) { @Override public GlobalId parsePersistentId(String protocol, String authority, String identifier) { - if (!HandlenetServiceBean.HDL_PROTOCOL.equals(protocol)) { + if (!HandlePidProvider.HDL_PROTOCOL.equals(protocol)) { return null; } return super.parsePersistentId(protocol, authority, identifier); @@ -100,6 +104,11 @@ public GlobalId parsePersistentId(String protocol, String authority, String iden @Override public String getUrlPrefix() { - return HandlenetServiceBean.HDL_RESOLVER_URL; + return HandlePidProvider.HDL_RESOLVER_URL; + } + + @Override + public String getProviderType() { + return "unamagedHandle"; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkPidProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkPidProvider.java new file mode 100644 index 00000000000..91c7f527c88 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkPidProvider.java @@ -0,0 +1,201 @@ +package edu.harvard.iq.dataverse.pidproviders.perma; + +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.SystemConfig; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +/** + * PermaLink provider This is a minimalist permanent ID provider intended for + * use with 'real' datasets/files where the use case none-the-less doesn't lend + * itself to the use of DOIs or Handles, e.g. * due to cost * for a + * catalog/archive where Dataverse has a dataset representing a dataset with + * DOI/handle stored elsewhere + * + * The initial implementation will mint identifiers locally and will provide the + * existing page URLs (using the ?persistentID= format). This will be + * overridable by a configurable parameter to support use of an external + * resolver. + * + */ +public class PermaLinkPidProvider extends AbstractPidProvider { + + private static final Logger logger = Logger.getLogger(PermaLinkPidProvider.class.getCanonicalName()); + + public static final String PERMA_PROTOCOL = "perma"; + public static final String TYPE = "perma"; + public static final String SEPARATOR = ""; + + // ToDo - remove + @Deprecated + public static final String PERMA_RESOLVER_URL = JvmSettings.PERMALINK_BASE_URL.lookupOptional("permalink") + .orElse(SystemConfig.getDataverseSiteUrlStatic()); + + + private String separator = SEPARATOR; + + private String baseUrl; + + public PermaLinkPidProvider(String id, String label, String providerAuthority, String providerShoulder, String identifierGenerationStyle, + String datafilePidFormat, String managedList, String excludedList, String baseUrl, String separator) { + super(id, label, PERMA_PROTOCOL, providerAuthority, providerShoulder, identifierGenerationStyle, datafilePidFormat, + managedList, excludedList); + this.baseUrl = baseUrl; + this.separator = separator; + } + + @Override + public String getSeparator() { + return separator; + } + + @Override + public boolean alreadyRegistered(GlobalId globalId, boolean noProviderDefault) { + // Perma doesn't manage registration, so we assume all local PIDs can be treated + // as registered + boolean existsLocally = !pidProviderService.isGlobalIdLocallyUnique(globalId); + return existsLocally ? existsLocally : noProviderDefault; + } + + @Override + public boolean registerWhenPublished() { + return false; + } + + @Override + public List getProviderInformation() { + return List.of(getId(), getBaseUrl()); + } + + @Override + public String createIdentifier(DvObject dvo) throws Throwable { + // Call external resolver and send landing URL? + // FWIW: Return value appears to only be used in RegisterDvObjectCommand where + // success requires finding the dvo identifier in this string. (Also logged a + // couple places). + return (dvo.getGlobalId().asString()); + } + + @Override + public Map getIdentifierMetadata(DvObject dvo) { + Map map = new HashMap<>(); + return map; + } + + @Override + public String modifyIdentifierTargetURL(DvObject dvo) throws Exception { + return getTargetUrl(dvo); + } + + @Override + public void deleteIdentifier(DvObject dvo) throws Exception { + // no-op + } + + @Override + public boolean publicizeIdentifier(DvObject dvObject) { + // Generate if needed (i.e. datafile case where we don't create/register early + // (even with registerWhenPublished == false)) + if (dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty()) { + dvObject = generatePid(dvObject); + } + // Call external resolver and send landing URL? + return true; + } + + @Override + public GlobalId parsePersistentId(String pidString) { + // ToDo - handle local PID resolver for dataset/file + logger.info("Parsing in Perma: " + pidString); + if (pidString.startsWith(getUrlPrefix())) { + pidString = pidString.replace(getUrlPrefix(), (PERMA_PROTOCOL + ":")); + } + return super.parsePersistentId(pidString); + } + + @Override + public GlobalId parsePersistentId(String protocol, String identifierString) { + logger.info("Checking Perma: " + identifierString); + if (!PERMA_PROTOCOL.equals(protocol)) { + return null; + } + String cleanIdentifier = PERMA_PROTOCOL + ":" + identifierString; + // With permalinks, we have to check the sets before parsing since the permalinks in these sets could have different authority, spearator, and shoulders + if (getExcludedSet().contains(cleanIdentifier)) { + return null; + } + if(getManagedSet().contains(cleanIdentifier)) { + /** With a variable separator that could also be empty, there is no way to determine the authority and shoulder for an unknown permalink. + * Since knowing this split isn't relevant for permalinks except for minting, the code below just assumes the authority + * is the first 4 characters and that the separator and the shoulder are empty. + * If this is found to cause issues, users should be able to use a managed permalink provider as a work-around. The code here could + * be changed to allow default lengths for the authority, separator, and shoulder and/or to add a list of known (but unmanaged) authority, separator, shoulder combos. + */ + if(identifierString.length() < 4) { + return new GlobalId(protocol, "", identifierString, SEPARATOR, getUrlPrefix(), + getId()); + } + return new GlobalId(protocol, identifierString.substring(0,4), identifierString.substring(4), SEPARATOR, getUrlPrefix(), + getId()); + } + String identifier = null; + if (getAuthority() != null) { + if (identifierString.startsWith(getAuthority())) { + identifier = identifierString.substring(getAuthority().length()); + } else { + //Doesn't match authority + return null; + } + if (identifier.startsWith(separator)) { + identifier = identifier.substring(separator.length()); + } else { + //Doesn't match separator + return null; + } + } + identifier = PidProvider.formatIdentifierString(identifier); + if (PidProvider.testforNullTerminator(identifier)) { + return null; + } + if(!identifier.startsWith(getShoulder())) { + //Doesn't match shoulder + return null; + } + return new GlobalId(PERMA_PROTOCOL, getAuthority(), identifier, separator, getUrlPrefix(), getId()); + } + + @Override + public GlobalId parsePersistentId(String protocol, String authority, String identifier) { + if (!PERMA_PROTOCOL.equals(protocol)) { + return null; + } + return super.parsePersistentId(protocol, authority, identifier); + } + + @Override + public String getUrlPrefix() { + + return getBaseUrl() + "/citation?persistentId=" + PERMA_PROTOCOL + ":"; + } + + @Override + public String getProtocol() { + return PERMA_PROTOCOL; + } + + @Override + public String getProviderType() { + return PERMA_PROTOCOL; + } + + public String getBaseUrl() { + return baseUrl; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkProviderFactory.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkProviderFactory.java new file mode 100644 index 00000000000..32b89223062 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkProviderFactory.java @@ -0,0 +1,43 @@ +package edu.harvard.iq.dataverse.pidproviders.perma; + +import com.google.auto.service.AutoService; + +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactory; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.SystemConfig; + +@AutoService(PidProviderFactory.class) +public class PermaLinkProviderFactory implements PidProviderFactory { + + @Override + public PidProvider createPidProvider(String providerId) { + String providerType = JvmSettings.PID_PROVIDER_TYPE.lookup(providerId); + if (!providerType.equals(PermaLinkPidProvider.TYPE)) { + // Being asked to create a non-EZId provider + return null; + } + String providerLabel = JvmSettings.PID_PROVIDER_LABEL.lookup(providerId); + String providerAuthority = JvmSettings.PID_PROVIDER_AUTHORITY.lookup(providerId); + String providerShoulder = JvmSettings.PID_PROVIDER_SHOULDER.lookupOptional(providerId).orElse(""); + String identifierGenerationStyle = JvmSettings.PID_PROVIDER_IDENTIFIER_GENERATION_STYLE + .lookupOptional(providerId).orElse("randomString"); + String datafilePidFormat = JvmSettings.PID_PROVIDER_DATAFILE_PID_FORMAT.lookupOptional(providerId) + .orElse(SystemConfig.DataFilePIDFormat.DEPENDENT.toString()); + String managedList = JvmSettings.PID_PROVIDER_MANAGED_LIST.lookupOptional(providerId).orElse(""); + String excludedList = JvmSettings.PID_PROVIDER_EXCLUDED_LIST.lookupOptional(providerId).orElse(""); + + String baseUrl = JvmSettings.PERMALINK_BASE_URL.lookupOptional(providerId) + .orElse(SystemConfig.getDataverseSiteUrlStatic()); + ; + String separator = JvmSettings.PERMALINK_SEPARATOR.lookupOptional(providerId).orElse(""); + + return new PermaLinkPidProvider(providerId, providerLabel, providerAuthority, providerShoulder, identifierGenerationStyle, + datafilePidFormat, managedList, excludedList, baseUrl, separator); + } + + public String getType() { + return PermaLinkPidProvider.TYPE; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/UnmanagedPermaLinkPidProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/UnmanagedPermaLinkPidProvider.java new file mode 100644 index 00000000000..b7961a41c50 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/UnmanagedPermaLinkPidProvider.java @@ -0,0 +1,114 @@ +package edu.harvard.iq.dataverse.pidproviders.perma; + +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.util.SystemConfig; + +import java.util.*; +import java.util.logging.Level; +import java.util.logging.Logger; +import jakarta.ejb.Stateless; +import org.apache.commons.lang3.NotImplementedException; + +/** This class is just used to parse Handles that are not managed by any account configured in Dataverse + * It does not implement any of the methods related to PID CRUD + * + */ +public class UnmanagedPermaLinkPidProvider extends AbstractPidProvider { + + private static final Logger logger = Logger.getLogger(UnmanagedPermaLinkPidProvider.class.getCanonicalName()); + public static final String ID = "UnmanagedPermaLinkProvider"; + + public UnmanagedPermaLinkPidProvider() { + // Also using ID as label + super(ID, ID, PermaLinkPidProvider.PERMA_PROTOCOL); + logger.log(Level.FINE, "Constructor"); + } + + @Override + public boolean canManagePID() { + return false; + } + + @Override + public boolean registerWhenPublished() { + throw new NotImplementedException(); + } + + @Override + public boolean alreadyRegistered(GlobalId pid, boolean noProviderDefault) throws Exception { + throw new NotImplementedException(); + } + + @Override + public Map getIdentifierMetadata(DvObject dvObject) { + throw new NotImplementedException(); + } + + @Override + public String modifyIdentifierTargetURL(DvObject dvObject) throws Exception { + throw new NotImplementedException(); + } + + @Override + public void deleteIdentifier(DvObject dvObject) throws Exception { + throw new NotImplementedException(); + } + + @Override + public List getProviderInformation() { + return List.of(getId(), ""); + } + + @Override + public String createIdentifier(DvObject dvObject) throws Throwable { + throw new NotImplementedException(); + } + + @Override + public boolean publicizeIdentifier(DvObject dvObject) { + throw new NotImplementedException(); + } + + @Override + public GlobalId parsePersistentId(String protocol, String identifierString) { + if (!PermaLinkPidProvider.PERMA_PROTOCOL.equals(protocol)) { + return null; + } + /** With a variable separator that could also be empty, there is no way to determine the authority and shoulder for an unknown/unmanaged permalink. + * Since knowing this split isn't relevant for unmanaged permalinks, the code below just assumes the authority + * is the first 4 characters and that the separator and the shoulder are empty. + * If this is found to cause issues, users should be able to use a managed permalink provider as a work-around. The code here could + * be changed to allow default lengths for the authority, separator, and shoulder and/or to add a list of known (but unmanaged) authority, separator, shoulder combos. + */ + if(identifierString.length() < 4) { + logger.warning("A short unmanaged permalink was found - assuming the authority is empty: " + identifierString); + return super.parsePersistentId(protocol, "", identifierString); + } + return super.parsePersistentId(protocol, identifierString.substring(0, 4), identifierString.substring(4)); + } + + @Override + public GlobalId parsePersistentId(String protocol, String authority, String identifier) { + if (!PermaLinkPidProvider.PERMA_PROTOCOL.equals(protocol)) { + return null; + } + return super.parsePersistentId(protocol, authority, identifier); + } + + @Override + public String getUrlPrefix() { + return SystemConfig.getDataverseSiteUrlStatic()+ "/citation?persistentId=" + PermaLinkPidProvider.PERMA_PROTOCOL + ":"; + } + + @Override + public String getProviderType() { + return PermaLinkPidProvider.TYPE; + } + + @Override + public String getSeparator() { + return PermaLinkPidProvider.SEPARATOR; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index d6d0be7a17b..5716b39e85c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1,27 +1,6 @@ package edu.harvard.iq.dataverse.search; -import edu.harvard.iq.dataverse.ControlledVocabularyValue; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.DataFileServiceBean; -import edu.harvard.iq.dataverse.DataFileTag; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetField; -import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; -import edu.harvard.iq.dataverse.DatasetFieldConstant; -import edu.harvard.iq.dataverse.DatasetFieldServiceBean; -import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.DatasetLinkingServiceBean; -import edu.harvard.iq.dataverse.DatasetServiceBean; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DataverseLinkingServiceBean; -import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.DvObjectServiceBean; -import edu.harvard.iq.dataverse.Embargo; -import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.PermissionServiceBean; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; @@ -420,6 +399,7 @@ public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { } } + @Asynchronous public void asyncIndexDatasetList(List datasets, boolean doNormalSolrDocCleanUp) { for(Dataset dataset : datasets) { asyncIndexDataset(dataset, true); @@ -803,6 +783,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set getDataversePathsFromSegments(List dataversePathSeg return subtrees; } + private void addLicenseToSolrDoc(SolrInputDocument solrInputDocument, DatasetVersion datasetVersion) { + if (datasetVersion != null && datasetVersion.getTermsOfUseAndAccess() != null) { + String licenseName = "Custom Terms"; + if(datasetVersion.getTermsOfUseAndAccess().getLicense() != null) { + licenseName = datasetVersion.getTermsOfUseAndAccess().getLicense().getName(); + } + solrInputDocument.addField(SearchFields.DATASET_LICENSE, licenseName); + } + } + private void addDataverseReleaseDateToSolrDoc(SolrInputDocument solrInputDocument, Dataverse dataverse) { if (dataverse.getPublicationDate() != null) { Calendar calendar = Calendar.getInstance(); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java index 8fb7c161517..399ca0340e7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java @@ -276,4 +276,6 @@ more targeted results for just datasets. The format is YYYY (i.e. public static final String DATASET_VALID = "datasetValid"; + public static final String DATASET_LICENSE = "license"; + } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 2ce06541afa..520aa36a12c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -22,6 +22,7 @@ import edu.harvard.iq.dataverse.ThumbnailServiceWrapper; import edu.harvard.iq.dataverse.WidgetWrapper; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import java.time.LocalDate; import java.util.ArrayList; @@ -120,7 +121,6 @@ public class SearchIncludeFragment implements java.io.Serializable { private Long facetCountDatasets = 0L; private Long facetCountFiles = 0L; Map previewCountbyType = new HashMap<>(); - private SolrQueryResponse solrQueryResponseAllTypes; private String sortField; private SortOrder sortOrder; private String currentSort; @@ -132,6 +132,7 @@ public class SearchIncludeFragment implements java.io.Serializable { Map datasetfieldFriendlyNamesBySolrField = new HashMap<>(); Map staticSolrFieldFriendlyNamesBySolrField = new HashMap<>(); private boolean solrIsDown = false; + private boolean solrIsTemporarilyUnavailable = false; private Map numberOfFacets = new HashMap<>(); // private boolean showUnpublished; List filterQueriesDebug = new ArrayList<>(); @@ -279,8 +280,9 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused SolrQueryResponse solrQueryResponse = null; + SolrQueryResponse solrQueryResponseSecondPass = null; - List filterQueriesFinal = new ArrayList<>(); + List filterQueriesExtended = new ArrayList<>(); if (dataverseAlias != null) { this.dataverse = dataverseService.findByAlias(dataverseAlias); @@ -294,7 +296,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused * @todo centralize this into SearchServiceBean */ if (!isfilterQueryAlreadyInMap(filterDownToSubtree)){ - filterQueriesFinal.add(filterDownToSubtree); + filterQueriesExtended.add(filterDownToSubtree); } // this.dataverseSubtreeContext = dataversePath; } else { @@ -307,22 +309,23 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused this.setRootDv(true); } + filterQueriesExtended.addAll(filterQueries); + + /** + * Add type queries, for the types (Dataverses, Datasets, Datafiles) + * currently selected: + */ selectedTypesList = new ArrayList<>(); String[] parts = selectedTypesString.split(":"); selectedTypesList.addAll(Arrays.asList(parts)); - - List filterQueriesFinalAllTypes = new ArrayList<>(); + String[] arr = selectedTypesList.toArray(new String[selectedTypesList.size()]); selectedTypesHumanReadable = combine(arr, " OR "); if (!selectedTypesHumanReadable.isEmpty()) { typeFilterQuery = SearchFields.TYPE + ":(" + selectedTypesHumanReadable + ")"; - } - - filterQueriesFinal.addAll(filterQueries); - filterQueriesFinalAllTypes.addAll(filterQueriesFinal); - - String allTypesFilterQuery = SearchFields.TYPE + ":(dataverses OR datasets OR files)"; - filterQueriesFinalAllTypes.add(allTypesFilterQuery); + } + List filterQueriesFinal = new ArrayList<>(); + filterQueriesFinal.addAll(filterQueriesExtended); filterQueriesFinal.add(typeFilterQuery); if (page <= 1) { @@ -344,6 +347,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused try { logger.fine("ATTENTION! query from user: " + query); logger.fine("ATTENTION! queryToPassToSolr: " + queryToPassToSolr); + logger.fine("ATTENTION! filterQueriesFinal: " + filterQueriesFinal.toString()); logger.fine("ATTENTION! sort by: " + sortField); /** @@ -355,18 +359,79 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused DataverseRequest dataverseRequest = new DataverseRequest(session.getUser(), httpServletRequest); List dataverses = new ArrayList<>(); dataverses.add(dataverse); - solrQueryResponse = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinal, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null); + solrQueryResponse = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinal, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null, !isFacetsDisabled(), true); if (solrQueryResponse.hasError()){ logger.info(solrQueryResponse.getError()); setSolrErrorEncountered(true); + } + // Solr "temporarily unavailable" is the condition triggered by + // receiving a 503 from the search engine, that is in turn a result + // of one of the Solr "circuit breakers" being triggered by excessive + // load. We treat this condition as distinct from "Solr is down", + // on the assumption that it is transitive. + if (solrQueryResponse.isSolrTemporarilyUnavailable()) { + setSolrTemporarilyUnavailable(true); } // This 2nd search() is for populating the "type" ("dataverse", "dataset", "file") facets: -- L.A. // (why exactly do we need it, again?) // To get the counts we display in the types facets particulary for unselected types - SEK 08/25/2021 - solrQueryResponseAllTypes = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalAllTypes, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null); - if (solrQueryResponse.hasError()){ - logger.info(solrQueryResponse.getError()); - setSolrErrorEncountered(true); + // Sure, but we should not waste resources here. We will try to save + // solr some extra work and a) only run this second query IF there is + // one or more unselected type facets; and b) drop all the extra + // parameters from this second query - such as facets and highlights - + // that we do not actually need for the purposes of finding these + // extra numbers. -- L.A. 10/16/2023 + + // populate preview counts: https://redmine.hmdc.harvard.edu/issues/3560 + previewCountbyType.put(BundleUtil.getStringFromBundle("dataverses"), 0L); + previewCountbyType.put(BundleUtil.getStringFromBundle("datasets"), 0L); + previewCountbyType.put(BundleUtil.getStringFromBundle("files"), 0L); + + + // This will populate the type facet counts for the types that are + // currently selected on the collection page: + for (FacetCategory facetCategory : solrQueryResponse.getTypeFacetCategories()) { + for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { + previewCountbyType.put(facetLabel.getName(), facetLabel.getCount()); + } + } + + if (!wasSolrErrorEncountered() && selectedTypesList.size() < 3 && !isSolrTemporarilyUnavailable() && !isFacetsDisabled()) { + // If some types are NOT currently selected, we will need to + // run a second search to obtain the numbers of the unselected types: + + List filterQueriesFinalSecondPass = new ArrayList<>(); + filterQueriesFinalSecondPass.addAll(filterQueriesExtended); + + arr = new String[3]; + int c = 0; + for (String dvObjectType : Arrays.asList("dataverses", "datasets", "files")) { + if (!selectedTypesList.contains(dvObjectType)) { + arr[c++] = dvObjectType; + } + } + filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ", c) + ")"); + logger.fine("second pass query: " + queryToPassToSolr); + logger.fine("second pass filter query: "+filterQueriesFinalSecondPass.toString()); + + solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, null, sortOrder.toString(), 0, onlyDataRelatedToMe, 1, false, null, null, false, false); + + if (solrQueryResponseSecondPass != null) { + + if (solrQueryResponseSecondPass.hasError()) { + logger.fine(solrQueryResponseSecondPass.getError()); + setSolrErrorEncountered(true); + } + + // And now populate the remaining type facets: + for (FacetCategory facetCategory : solrQueryResponseSecondPass.getTypeFacetCategories()) { + for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { + previewCountbyType.put(facetLabel.getName(), facetLabel.getCount()); + } + } + } else { + logger.warning("null solr response from the second pass type query"); + } } } catch (SearchException ex) { @@ -446,17 +511,6 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused } } - // populate preview counts: https://redmine.hmdc.harvard.edu/issues/3560 - previewCountbyType.put(BundleUtil.getStringFromBundle("dataverses"), 0L); - previewCountbyType.put(BundleUtil.getStringFromBundle("datasets"), 0L); - previewCountbyType.put(BundleUtil.getStringFromBundle("files"), 0L); - if (solrQueryResponseAllTypes != null) { - for (FacetCategory facetCategory : solrQueryResponseAllTypes.getTypeFacetCategories()) { - for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { - previewCountbyType.put(facetLabel.getName(), facetLabel.getCount()); - } - } - } setDisplayCardValues(); @@ -606,6 +660,10 @@ public void incrementFacets(String name, int incrementNum) { // http://stackoverflow.com/questions/1515437/java-function-for-arrays-like-phps-join/1515548#1515548 String combine(String[] s, String glue) { int k = s.length; + return combine(s, glue, k); + } + + String combine(String[] s, String glue, int k) { if (k == 0) { return null; } @@ -1020,7 +1078,29 @@ public boolean isSolrIsDown() { public void setSolrIsDown(boolean solrIsDown) { this.solrIsDown = solrIsDown; } + + public boolean isSolrTemporarilyUnavailable() { + return solrIsTemporarilyUnavailable; + } + + public void setSolrTemporarilyUnavailable(boolean solrIsTemporarilyUnavailable) { + this.solrIsTemporarilyUnavailable = solrIsTemporarilyUnavailable; + } + /** + * Indicates that the fragment should not be requesting facets in Solr + * searches and rendering them on the page. + * @return true if disabled; false by default + */ + public boolean isFacetsDisabled() { + // The method is used in rendered="..." logic. So we are using + // SettingsWrapper to make sure we are not looking it up repeatedly + // (settings are not expensive to look up, but + // still). + + return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacets, false); + } + public boolean isRootDv() { return rootDv; } @@ -1163,6 +1243,12 @@ public List getFriendlyNamesFromFilterQuery(String filterQuery) { friendlyNames.add(friendlyName.get()); return friendlyNames; } + } else if (key.equals(SearchFields.DATASET_LICENSE)) { + try { + friendlyNames.add(BundleUtil.getStringFromPropertyFile("license." + valueWithoutQuotes.toLowerCase().replace(" ","_") + ".name", "License")); + } catch (Exception e) { + logger.fine(String.format("action=getFriendlyNamesFromFilterQuery cannot find friendlyName for key=%s value=%s", key, value)); + } } friendlyNames.add(valueWithoutQuotes); @@ -1300,7 +1386,7 @@ public void setDisplayCardValues() { result.setImageUrl(thumbnailServiceWrapper.getDataverseCardImageAsBase64Url(result)); } else if (result.getType().equals("datasets")) { if (result.getEntity() != null) { - result.setImageUrl(thumbnailServiceWrapper.getDatasetCardImageAsBase64Url(result)); + result.setImageUrl(thumbnailServiceWrapper.getDatasetCardImageAsUrl(result)); } if (result.isHarvested()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index 44976d232c2..0b93c617c1a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -101,7 +101,7 @@ public class SearchServiceBean { public SolrQueryResponse search(DataverseRequest dataverseRequest, List dataverses, String query, List filterQueries, String sortField, String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage) throws SearchException { return search(dataverseRequest, dataverses, query, filterQueries, sortField, sortOrder, paginationStart, onlyDatatRelatedToMe, numResultsPerPage, true, null, null); } - + /** * Import note: "onlyDatatRelatedToMe" relies on filterQueries for providing * access to Private Data for the correct user @@ -122,6 +122,41 @@ public SolrQueryResponse search(DataverseRequest dataverseRequest, List dataverses, + String query, + List filterQueries, + String sortField, + String sortOrder, + int paginationStart, + boolean onlyDatatRelatedToMe, + int numResultsPerPage, + boolean retrieveEntities, + String geoPoint, + String geoRadius) throws SearchException { + return search(dataverseRequest, dataverses, query, filterQueries, sortField, sortOrder, paginationStart, onlyDatatRelatedToMe, numResultsPerPage, true, null, null, true, true); + } + + /** + * @param dataverseRequest + * @param dataverses + * @param query + * @param filterQueries + * @param sortField + * @param sortOrder + * @param paginationStart + * @param onlyDatatRelatedToMe + * @param numResultsPerPage + * @param retrieveEntities - look up dvobject entities with .find() (potentially expensive!) + * @param geoPoint e.g. "35,15" + * @param geoRadius e.g. "5" + * @param addFacets boolean + * @param addHighlights boolean * @return * @throws SearchException */ @@ -130,13 +165,16 @@ public SolrQueryResponse search( List dataverses, String query, List filterQueries, - String sortField, String sortOrder, + String sortField, + String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage, boolean retrieveEntities, String geoPoint, - String geoRadius + String geoRadius, + boolean addFacets, + boolean addHighlights ) throws SearchException { if (paginationStart < 0) { @@ -152,68 +190,27 @@ public SolrQueryResponse search( // SortClause foo = new SortClause("name", SolrQuery.ORDER.desc); // if (query.equals("*") || query.equals("*:*")) { // solrQuery.setSort(new SortClause(SearchFields.NAME_SORT, SolrQuery.ORDER.asc)); - solrQuery.setSort(new SortClause(sortField, sortOrder)); + if (sortField != null) { + // is it ok not to specify any sort? - there are cases where we + // don't care, and it must cost some extra cycles -- L.A. + solrQuery.setSort(new SortClause(sortField, sortOrder)); + } // } else { // solrQuery.setSort(sortClause); // } // solrQuery.setSort(sortClause); - solrQuery.setHighlight(true).setHighlightSnippets(1); - Integer fragSize = systemConfig.getSearchHighlightFragmentSize(); - if (fragSize != null) { - solrQuery.setHighlightFragsize(fragSize); - } - solrQuery.setHighlightSimplePre(""); - solrQuery.setHighlightSimplePost(""); - Map solrFieldsToHightlightOnMap = new HashMap<>(); - // TODO: Do not hard code "Name" etc as English here. - solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name"); - solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation"); - solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type"); - solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description"); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name"); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label"); - solrFieldsToHightlightOnMap.put(SearchFields.LITERAL_QUESTION, BundleUtil.getStringFromBundle("search.datasets.literalquestion")); - solrFieldsToHightlightOnMap.put(SearchFields.INTERVIEW_INSTRUCTIONS, BundleUtil.getStringFromBundle("search.datasets.interviewinstructions")); - solrFieldsToHightlightOnMap.put(SearchFields.POST_QUESTION, BundleUtil.getStringFromBundle("search.datasets.postquestion")); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_UNIVERSE, BundleUtil.getStringFromBundle("search.datasets.variableuniverse")); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NOTES, BundleUtil.getStringFromBundle("search.datasets.variableNotes")); - - solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type"); - solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Year"); - solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.datasets.persistentId")); - solrFieldsToHightlightOnMap.put(SearchFields.FILE_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.files.persistentId")); - /** - * @todo Dataverse subject and affiliation should be highlighted but - * this is commented out right now because the "friendly" names are not - * being shown on the dataverse cards. See also - * https://github.com/IQSS/dataverse/issues/1431 - */ -// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_SUBJECT, "Subject"); -// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_AFFILIATION, "Affiliation"); - /** - * @todo: show highlight on file card? - * https://redmine.hmdc.harvard.edu/issues/3848 - */ - solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension"); - solrFieldsToHightlightOnMap.put(SearchFields.FILE_TAG_SEARCHABLE, "File Tag"); - List datasetFields = datasetFieldService.findAllOrderedById(); - for (DatasetFieldType datasetFieldType : datasetFields) { - String solrField = datasetFieldType.getSolrField().getNameSearchable(); - String displayName = datasetFieldType.getDisplayName(); - solrFieldsToHightlightOnMap.put(solrField, displayName); - } - for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { - String solrField = entry.getKey(); - // String displayName = entry.getValue(); - solrQuery.addHighlightField(solrField); - } + + solrQuery.setParam("fl", "*,score"); solrQuery.setParam("qt", "/select"); solrQuery.setParam("facet", "true"); + /** * @todo: do we need facet.query? */ solrQuery.setParam("facet.query", "*"); + solrQuery.addFacetField(SearchFields.TYPE); // this one is always performed + for (String filterQuery : filterQueries) { solrQuery.addFilterQuery(filterQuery); } @@ -223,70 +220,128 @@ public SolrQueryResponse search( // See https://solr.apache.org/guide/8_11/spatial-search.html#bbox solrQuery.addFilterQuery("{!bbox sfield=" + SearchFields.GEOLOCATION + "}"); } + + List metadataBlockFacets = new LinkedList<>(); - // ----------------------------------- - // Facets to Retrieve - // ----------------------------------- - solrQuery.addFacetField(SearchFields.METADATA_TYPES); -// solrQuery.addFacetField(SearchFields.HOST_DATAVERSE); -// solrQuery.addFacetField(SearchFields.AUTHOR_STRING); - solrQuery.addFacetField(SearchFields.DATAVERSE_CATEGORY); - solrQuery.addFacetField(SearchFields.METADATA_SOURCE); -// solrQuery.addFacetField(SearchFields.AFFILIATION); - solrQuery.addFacetField(SearchFields.PUBLICATION_YEAR); -// solrQuery.addFacetField(SearchFields.CATEGORY); -// solrQuery.addFacetField(SearchFields.FILE_TYPE_MIME); -// solrQuery.addFacetField(SearchFields.DISTRIBUTOR); -// solrQuery.addFacetField(SearchFields.KEYWORD); - /** - * @todo when a new method on datasetFieldService is available - * (retrieveFacetsByDataverse?) only show the facets that the dataverse - * in question wants to show (and in the right order): - * https://redmine.hmdc.harvard.edu/issues/3490 - * - * also, findAll only returns advancedSearchField = true... we should - * probably introduce the "isFacetable" boolean rather than caring about - * if advancedSearchField is true or false - * - */ + if (addFacets) { + // ----------------------------------- + // Facets to Retrieve + // ----------------------------------- + solrQuery.addFacetField(SearchFields.METADATA_TYPES); + solrQuery.addFacetField(SearchFields.DATAVERSE_CATEGORY); + solrQuery.addFacetField(SearchFields.METADATA_SOURCE); + solrQuery.addFacetField(SearchFields.PUBLICATION_YEAR); + solrQuery.addFacetField(SearchFields.DATASET_LICENSE); + /** + * @todo when a new method on datasetFieldService is available + * (retrieveFacetsByDataverse?) only show the facets that the + * dataverse in question wants to show (and in the right order): + * https://redmine.hmdc.harvard.edu/issues/3490 + * + * also, findAll only returns advancedSearchField = true... we + * should probably introduce the "isFacetable" boolean rather than + * caring about if advancedSearchField is true or false + * + */ + + if (dataverses != null) { + for (Dataverse dataverse : dataverses) { + if (dataverse != null) { + for (DataverseFacet dataverseFacet : dataverse.getDataverseFacets()) { + DatasetFieldType datasetField = dataverseFacet.getDatasetFieldType(); + solrQuery.addFacetField(datasetField.getSolrField().getNameFacetable()); + } + // Get all metadata block facets configured to be displayed + metadataBlockFacets.addAll(dataverse.getMetadataBlockFacets()); + } + } + } + + solrQuery.addFacetField(SearchFields.FILE_TYPE); + /** + * @todo: hide the extra line this shows in the GUI... at least it's + * last... + */ + solrQuery.addFacetField(SearchFields.FILE_TAG); + if (!systemConfig.isPublicInstall()) { + solrQuery.addFacetField(SearchFields.ACCESS); + } + } + + List datasetFields = datasetFieldService.findAllOrderedById(); + Map solrFieldsToHightlightOnMap = new HashMap<>(); + if (addHighlights) { + solrQuery.setHighlight(true).setHighlightSnippets(1); + Integer fragSize = systemConfig.getSearchHighlightFragmentSize(); + if (fragSize != null) { + solrQuery.setHighlightFragsize(fragSize); + } + solrQuery.setHighlightSimplePre(""); + solrQuery.setHighlightSimplePost(""); + + // TODO: Do not hard code "Name" etc as English here. + solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name"); + solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation"); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type"); + solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description"); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name"); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label"); + solrFieldsToHightlightOnMap.put(SearchFields.LITERAL_QUESTION, BundleUtil.getStringFromBundle("search.datasets.literalquestion")); + solrFieldsToHightlightOnMap.put(SearchFields.INTERVIEW_INSTRUCTIONS, BundleUtil.getStringFromBundle("search.datasets.interviewinstructions")); + solrFieldsToHightlightOnMap.put(SearchFields.POST_QUESTION, BundleUtil.getStringFromBundle("search.datasets.postquestion")); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_UNIVERSE, BundleUtil.getStringFromBundle("search.datasets.variableuniverse")); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NOTES, BundleUtil.getStringFromBundle("search.datasets.variableNotes")); + + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type"); + solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Year"); + solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.datasets.persistentId")); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.files.persistentId")); + /** + * @todo Dataverse subject and affiliation should be highlighted but + * this is commented out right now because the "friendly" names are + * not being shown on the dataverse cards. See also + * https://github.com/IQSS/dataverse/issues/1431 + */ +// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_SUBJECT, "Subject"); +// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_AFFILIATION, "Affiliation"); + /** + * @todo: show highlight on file card? + * https://redmine.hmdc.harvard.edu/issues/3848 + */ + solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension"); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TAG_SEARCHABLE, "File Tag"); + + for (DatasetFieldType datasetFieldType : datasetFields) { + String solrField = datasetFieldType.getSolrField().getNameSearchable(); + String displayName = datasetFieldType.getDisplayName(); + solrFieldsToHightlightOnMap.put(solrField, displayName); + } + for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { + String solrField = entry.getKey(); + // String displayName = entry.getValue(); + solrQuery.addHighlightField(solrField); + } + } - List metadataBlockFacets = new LinkedList<>(); //I'm not sure if just adding null here is good for hte permissions system... i think it needs something if(dataverses != null) { for(Dataverse dataverse : dataverses) { // ----------------------------------- // PERMISSION FILTER QUERY // ----------------------------------- - String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, dataverse, onlyDatatRelatedToMe); + String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, dataverse, onlyDatatRelatedToMe, addFacets); if (permissionFilterQuery != null) { solrQuery.addFilterQuery(permissionFilterQuery); } - if (dataverse != null) { - for (DataverseFacet dataverseFacet : dataverse.getDataverseFacets()) { - DatasetFieldType datasetField = dataverseFacet.getDatasetFieldType(); - solrQuery.addFacetField(datasetField.getSolrField().getNameFacetable()); - } - // Get all metadata block facets configured to be displayed - metadataBlockFacets.addAll(dataverse.getMetadataBlockFacets()); - } } } else { - String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, null, onlyDatatRelatedToMe); + String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, null, onlyDatatRelatedToMe, addFacets); if (permissionFilterQuery != null) { solrQuery.addFilterQuery(permissionFilterQuery); } } - solrQuery.addFacetField(SearchFields.FILE_TYPE); - /** - * @todo: hide the extra line this shows in the GUI... at least it's - * last... - */ - solrQuery.addFacetField(SearchFields.TYPE); - solrQuery.addFacetField(SearchFields.FILE_TAG); - if (!systemConfig.isPublicInstall()) { - solrQuery.addFacetField(SearchFields.ACCESS); - } + /** * @todo: do sanity checking... throw error if negative */ @@ -328,10 +383,32 @@ public SolrQueryResponse search( // Make the solr query // ----------------------------------- QueryResponse queryResponse = null; + try { queryResponse = solrClientService.getSolrClient().query(solrQuery); + } catch (RemoteSolrException ex) { String messageFromSolr = ex.getLocalizedMessage(); + + logger.fine("message from the solr exception: "+messageFromSolr+"; code: "+ex.code()); + + SolrQueryResponse exceptionSolrQueryResponse = new SolrQueryResponse(solrQuery); + + // We probably shouldn't be assuming that this is necessarily a + // "search syntax error", as the code below implies - could be + // something else too - ? + + // Specifically, we now rely on the Solr "circuit breaker" mechanism + // to start dropping requests with 503, when the service is + // overwhelmed with requests load (with the assumption that this is + // a transient condition): + + if (ex.code() == 503) { + // actual logic for communicating this state back to the local + // client code TBD (@todo) + exceptionSolrQueryResponse.setSolrTemporarilyUnavailable(true); + } + String error = "Search Syntax Error: "; String stringToHide = "org.apache.solr.search.SyntaxError: "; if (messageFromSolr.startsWith(stringToHide)) { @@ -341,10 +418,10 @@ public SolrQueryResponse search( error += messageFromSolr; } logger.info(error); - SolrQueryResponse exceptionSolrQueryResponse = new SolrQueryResponse(solrQuery); exceptionSolrQueryResponse.setError(error); // we can't show anything because of the search syntax error + long zeroNumResultsFound = 0; long zeroGetResultsStart = 0; List emptySolrSearchResults = new ArrayList<>(); @@ -360,6 +437,12 @@ public SolrQueryResponse search( } catch (SolrServerException | IOException ex) { throw new SearchException("Internal Dataverse Search Engine Error", ex); } + + int statusCode = queryResponse.getStatus(); + + logger.fine("status code of the query response: "+statusCode); + logger.fine("_size from query response: "+queryResponse._size()); + logger.fine("qtime: "+queryResponse.getQTime()); SolrDocumentList docs = queryResponse.getResults(); List solrSearchResults = new ArrayList<>(); @@ -416,34 +499,44 @@ public SolrQueryResponse search( Boolean datasetValid = (Boolean) solrDocument.getFieldValue(SearchFields.DATASET_VALID); List matchedFields = new ArrayList<>(); - List highlights = new ArrayList<>(); - Map highlightsMap = new HashMap<>(); - Map> highlightsMap2 = new HashMap<>(); - Map highlightsMap3 = new HashMap<>(); - if (queryResponse.getHighlighting().get(id) != null) { - for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { - String field = entry.getKey(); - String displayName = entry.getValue(); - - List highlightSnippets = queryResponse.getHighlighting().get(id).get(field); - if (highlightSnippets != null) { - matchedFields.add(field); - /** - * @todo only SolrField.SolrType.STRING? that's not - * right... knit the SolrField object more into the - * highlighting stuff - */ - SolrField solrField = new SolrField(field, SolrField.SolrType.STRING, true, true); - Highlight highlight = new Highlight(solrField, highlightSnippets, displayName); - highlights.add(highlight); - highlightsMap.put(solrField, highlight); - highlightsMap2.put(solrField, highlightSnippets); - highlightsMap3.put(field, highlight); + + SolrSearchResult solrSearchResult = new SolrSearchResult(query, name); + + if (addHighlights) { + List highlights = new ArrayList<>(); + Map highlightsMap = new HashMap<>(); + Map> highlightsMap2 = new HashMap<>(); + Map highlightsMap3 = new HashMap<>(); + if (queryResponse.getHighlighting().get(id) != null) { + for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { + String field = entry.getKey(); + String displayName = entry.getValue(); + + List highlightSnippets = queryResponse.getHighlighting().get(id).get(field); + if (highlightSnippets != null) { + matchedFields.add(field); + /** + * @todo only SolrField.SolrType.STRING? that's not + * right... knit the SolrField object more into the + * highlighting stuff + */ + SolrField solrField = new SolrField(field, SolrField.SolrType.STRING, true, true); + Highlight highlight = new Highlight(solrField, highlightSnippets, displayName); + highlights.add(highlight); + highlightsMap.put(solrField, highlight); + highlightsMap2.put(solrField, highlightSnippets); + highlightsMap3.put(field, highlight); + } } + } + solrSearchResult.setHighlightsAsList(highlights); + solrSearchResult.setHighlightsMap(highlightsMap); + solrSearchResult.setHighlightsAsMap(highlightsMap3); } - SolrSearchResult solrSearchResult = new SolrSearchResult(query, name); + + /** * @todo put all this in the constructor? */ @@ -470,9 +563,7 @@ public SolrQueryResponse search( solrSearchResult.setNameSort(nameSort); solrSearchResult.setReleaseOrCreateDate(release_or_create_date); solrSearchResult.setMatchedFields(matchedFields); - solrSearchResult.setHighlightsAsList(highlights); - solrSearchResult.setHighlightsMap(highlightsMap); - solrSearchResult.setHighlightsAsMap(highlightsMap3); + Map parent = new HashMap<>(); String description = (String) solrDocument.getFieldValue(SearchFields.DESCRIPTION); solrSearchResult.setDescriptionNoSnippet(description); @@ -622,10 +713,12 @@ public SolrQueryResponse search( boolean unpublishedAvailable = false; boolean deaccessionedAvailable = false; boolean hideMetadataSourceFacet = true; + boolean hideLicenseFacet = true; for (FacetField facetField : queryResponse.getFacetFields()) { FacetCategory facetCategory = new FacetCategory(); List facetLabelList = new ArrayList<>(); int numMetadataSources = 0; + int numLicenses = 0; String metadataBlockName = ""; String datasetFieldName = ""; /** @@ -651,23 +744,29 @@ public SolrQueryResponse search( // logger.info("field: " + facetField.getName() + " " + facetFieldCount.getName() + " (" + facetFieldCount.getCount() + ")"); String localefriendlyName = null; if (facetFieldCount.getCount() > 0) { - if(metadataBlockName.length() > 0 ) { - localefriendlyName = getLocaleTitle(datasetFieldName,facetFieldCount.getName(), metadataBlockName); + if(metadataBlockName.length() > 0 ) { + localefriendlyName = getLocaleTitle(datasetFieldName,facetFieldCount.getName(), metadataBlockName); } else if (facetField.getName().equals(SearchFields.METADATA_TYPES)) { - Optional metadataBlockFacet = metadataBlockFacets.stream().filter(blockFacet -> blockFacet.getMetadataBlock().getName().equals(facetFieldCount.getName())).findFirst(); - if (metadataBlockFacet.isEmpty()) { + Optional metadataBlockFacet = metadataBlockFacets.stream().filter(blockFacet -> blockFacet.getMetadataBlock().getName().equals(facetFieldCount.getName())).findFirst(); + if (metadataBlockFacet.isEmpty()) { // metadata block facet is not configured to be displayed => ignore continue; - } + } - localefriendlyName = metadataBlockFacet.get().getMetadataBlock().getLocaleDisplayFacet(); - } else { - try { + localefriendlyName = metadataBlockFacet.get().getMetadataBlock().getLocaleDisplayFacet(); + } else if (facetField.getName().equals(SearchFields.DATASET_LICENSE)) { + try { + localefriendlyName = BundleUtil.getStringFromPropertyFile("license." + facetFieldCount.getName().toLowerCase().replace(" ","_") + ".name", "License"); + } catch (Exception e) { + localefriendlyName = facetFieldCount.getName(); + } + } else { + try { localefriendlyName = BundleUtil.getStringFromPropertyFile(facetFieldCount.getName(), "Bundle"); - } catch (Exception e) { + } catch (Exception e) { localefriendlyName = facetFieldCount.getName(); - } - } + } + } FacetLabel facetLabel = new FacetLabel(localefriendlyName, facetFieldCount.getCount()); // quote field facets facetLabel.setFilterQuery(facetField.getName() + ":\"" + facetFieldCount.getName() + "\""); @@ -680,15 +779,19 @@ public SolrQueryResponse search( } else if (facetFieldCount.getName().equals(IndexServiceBean.getDEACCESSIONED_STRING())) { deaccessionedAvailable = true; } - } - if (facetField.getName().equals(SearchFields.METADATA_SOURCE)) { + } else if (facetField.getName().equals(SearchFields.METADATA_SOURCE)) { numMetadataSources++; + } else if (facetField.getName().equals(SearchFields.DATASET_LICENSE)) { + numLicenses++; } } } if (numMetadataSources > 1) { hideMetadataSourceFacet = false; } + if (numLicenses > 1) { + hideLicenseFacet = false; + } facetCategory.setName(facetField.getName()); // hopefully people will never see the raw facetField.getName() because it may well have an _s at the end facetCategory.setFriendlyName(facetField.getName()); @@ -765,6 +868,10 @@ public SolrQueryResponse search( if (!hideMetadataSourceFacet) { facetCategoryList.add(facetCategory); } + } else if (facetCategory.getName().equals(SearchFields.DATASET_LICENSE)) { + if (!hideLicenseFacet) { + facetCategoryList.add(facetCategory); + } } else { facetCategoryList.add(facetCategory); } @@ -863,7 +970,7 @@ public String getCapitalizedName(String name) { * * @return */ - private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, Dataverse dataverse, boolean onlyDatatRelatedToMe) { + private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, Dataverse dataverse, boolean onlyDatatRelatedToMe, boolean addFacets) { User user = dataverseRequest.getUser(); if (user == null) { @@ -922,9 +1029,11 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ AuthenticatedUser au = (AuthenticatedUser) user; - // Logged in user, has publication status facet - // - solrQuery.addFacetField(SearchFields.PUBLICATION_STATUS); + if (addFacets) { + // Logged in user, has publication status facet + // + solrQuery.addFacetField(SearchFields.PUBLICATION_STATUS); + } // ---------------------------------------------------- // (3) Is this a Super User? diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java index 893099ff08d..27e79cb1fc2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java @@ -26,6 +26,7 @@ public class SolrQueryResponse { private String error; private Map dvObjectCounts = new HashMap<>(); private Map publicationStatusCounts = new HashMap<>(); + private boolean solrTemporarilyUnavailable = false; public static String DATAVERSES_COUNT_KEY = "dataverses_count"; public static String DATASETS_COUNT_KEY = "datasets_count"; @@ -91,7 +92,14 @@ public JsonObjectBuilder getPublicationStatusCountsAsJSON(){ } return this.getMapCountsAsJSON(publicationStatusCounts); } - + + public boolean isSolrTemporarilyUnavailable() { + return solrTemporarilyUnavailable; + } + + public void setSolrTemporarilyUnavailable(boolean solrTemporarilyUnavailable) { + this.solrTemporarilyUnavailable = solrTemporarilyUnavailable; + } public JsonObjectBuilder getDvObjectCountsAsJSON(){ diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/ConfigCheckService.java b/src/main/java/edu/harvard/iq/dataverse/settings/ConfigCheckService.java new file mode 100644 index 00000000000..7ca5fe04bcd --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/settings/ConfigCheckService.java @@ -0,0 +1,95 @@ +package edu.harvard.iq.dataverse.settings; + +import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.util.FileUtil; + +import jakarta.annotation.PostConstruct; +import jakarta.ejb.DependsOn; +import jakarta.ejb.Singleton; +import jakarta.ejb.Startup; +import java.io.IOException; +import java.nio.file.FileSystemException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; + +@Startup +@Singleton +@DependsOn({"StartupFlywayMigrator", "PidProviderFactoryBean"}) +public class ConfigCheckService { + + + private static final Logger logger = Logger.getLogger(ConfigCheckService.class.getCanonicalName()); + + public static class ConfigurationError extends RuntimeException { + public ConfigurationError(String message) { + super(message); + } + } + + @PostConstruct + public void startup() { + if (!checkSystemDirectories() || !checkPidProviders()) { + throw new ConfigurationError("Not all configuration checks passed successfully. See logs above."); + } + } + + + + /** + * In this method, we check the existence and write-ability of all important directories we use during + * normal operations. It does not include checks for the storage system. If directories are not available, + * try to create them (and fail when not allowed to). + * + * @return True if all checks successful, false otherwise. + */ + public boolean checkSystemDirectories() { + Map paths = Map.of( + Path.of(JvmSettings.UPLOADS_DIRECTORY.lookup()), "temporary JSF upload space (see " + JvmSettings.UPLOADS_DIRECTORY.getScopedKey() + ")", + Path.of(FileUtil.getFilesTempDirectory()), "temporary processing space (see " + JvmSettings.FILES_DIRECTORY.getScopedKey() + ")", + Path.of(JvmSettings.DOCROOT_DIRECTORY.lookup()), "docroot space (see " + JvmSettings.DOCROOT_DIRECTORY.getScopedKey() + ")"); + + boolean success = true; + for (Path path : paths.keySet()) { + // Check if the configured path is absolute - avoid potential problems with relative paths this way + if (! path.isAbsolute()) { + logger.log(Level.SEVERE, () -> "Configured directory " + path + " for " + paths.get(path) + " is not absolute"); + success = false; + continue; + } + + if (! Files.exists(path)) { + try { + Files.createDirectories(path); + } catch (IOException e) { + String details; + if (e instanceof FileSystemException) { + details = ": " + e.getClass(); + } else { + details = ""; + } + + logger.log(Level.SEVERE, () -> "Could not create directory " + path + " for " + paths.get(path) + details); + success = false; + } + } else if (!Files.isWritable(path)) { + logger.log(Level.SEVERE, () -> "Directory " + path + " for " + paths.get(path) + " exists, but is not writeable"); + success = false; + } + } + return success; + } + + /** + * Verifies that at least one PidProvider capable of editing/minting PIDs is + * configured. Requires the @DependsOn("PidProviderFactoryBean") annotation above + * since it is the @PostCOnstruct init() method of that class that loads the PidProviders + * + * @return True if all checks successful, false otherwise. + */ + private boolean checkPidProviders() { + return PidUtil.getManagedProviderIds().size() > 0; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 738d63e924f..b92618dab89 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -2,8 +2,6 @@ import org.eclipse.microprofile.config.ConfigProvider; -import edu.harvard.iq.dataverse.util.StringUtil; - import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -49,6 +47,10 @@ public enum JvmSettings { // FILES SETTINGS SCOPE_FILES(PREFIX, "files"), FILES_DIRECTORY(SCOPE_FILES, "directory"), + UPLOADS_DIRECTORY(SCOPE_FILES, "uploads"), + DOCROOT_DIRECTORY(SCOPE_FILES, "docroot"), + GUESTBOOK_AT_REQUEST(SCOPE_FILES, "guestbook-at-request"), + GLOBUS_CACHE_MAXAGE(SCOPE_FILES, "globus-cache-maxage"), // SOLR INDEX SETTINGS SCOPE_SOLR(PREFIX, "solr"), @@ -89,40 +91,122 @@ public enum JvmSettings { // PERSISTENT IDENTIFIER SETTINGS SCOPE_PID(PREFIX, "pid"), - - // PROVIDER EZID (legacy) - these settings were formerly kept together with DataCite ones - SCOPE_PID_EZID(SCOPE_PID, "ezid"), - EZID_API_URL(SCOPE_PID_EZID, "api-url", "doi.baseurlstring"), - EZID_USERNAME(SCOPE_PID_EZID, "username", "doi.username"), - EZID_PASSWORD(SCOPE_PID_EZID, "password", "doi.password"), + PID_PROVIDERS(SCOPE_PID, "providers"), + PID_DEFAULT_PROVIDER(SCOPE_PID, "default-provider"), + SCOPE_PID_PROVIDER(SCOPE_PID), + PID_PROVIDER_TYPE(SCOPE_PID_PROVIDER, "type"), + PID_PROVIDER_LABEL(SCOPE_PID_PROVIDER, "label"), + PID_PROVIDER_AUTHORITY(SCOPE_PID_PROVIDER, "authority"), + PID_PROVIDER_SHOULDER(SCOPE_PID_PROVIDER, "shoulder"), + PID_PROVIDER_IDENTIFIER_GENERATION_STYLE(SCOPE_PID_PROVIDER, "identifier-generation-style"), + PID_PROVIDER_DATAFILE_PID_FORMAT(SCOPE_PID_PROVIDER, "datafile-pid-format"), + PID_PROVIDER_MANAGED_LIST(SCOPE_PID_PROVIDER, "managed-list"), + PID_PROVIDER_EXCLUDED_LIST(SCOPE_PID_PROVIDER, "excluded-list"), + + + // PROVIDER EZID - these settings were formerly kept together with DataCite ones + SCOPE_PID_EZID(SCOPE_PID_PROVIDER, "ezid"), + EZID_API_URL(SCOPE_PID_EZID, "api-url"), + EZID_USERNAME(SCOPE_PID_EZID, "username"), + EZID_PASSWORD(SCOPE_PID_EZID, "password"), // PROVIDER DATACITE - SCOPE_PID_DATACITE(SCOPE_PID, "datacite"), - DATACITE_MDS_API_URL(SCOPE_PID_DATACITE, "mds-api-url", "doi.baseurlstring"), - DATACITE_REST_API_URL(SCOPE_PID_DATACITE, "rest-api-url", "doi.dataciterestapiurlstring", "doi.mdcbaseurlstring"), - DATACITE_USERNAME(SCOPE_PID_DATACITE, "username", "doi.username"), - DATACITE_PASSWORD(SCOPE_PID_DATACITE, "password", "doi.password"), + SCOPE_PID_DATACITE(SCOPE_PID_PROVIDER, "datacite"), + DATACITE_MDS_API_URL(SCOPE_PID_DATACITE, "mds-api-url"), + DATACITE_REST_API_URL(SCOPE_PID_DATACITE, "rest-api-url"), + DATACITE_USERNAME(SCOPE_PID_DATACITE, "username"), + DATACITE_PASSWORD(SCOPE_PID_DATACITE, "password"), // PROVIDER PERMALINK - SCOPE_PID_PERMALINK(SCOPE_PID, "permalink"), - PERMALINK_BASEURL(SCOPE_PID_PERMALINK, "base-url", "perma.baseurlstring"), + SCOPE_PID_PERMALINK(SCOPE_PID_PROVIDER, "permalink"), + PERMALINK_BASE_URL(SCOPE_PID_PERMALINK, "base-url"), + PERMALINK_SEPARATOR(SCOPE_PID_PERMALINK, "separator"), // PROVIDER HANDLE - SCOPE_PID_HANDLENET(SCOPE_PID, "handlenet"), - HANDLENET_INDEX(SCOPE_PID_HANDLENET, "index", "dataverse.handlenet.index"), + SCOPE_PID_HANDLENET(SCOPE_PID_PROVIDER, "handlenet"), + HANDLENET_INDEX(SCOPE_PID_HANDLENET, "index"), + HANDLENET_INDEPENDENT_SERVICE(SCOPE_PID_HANDLENET, "independent-service"), + HANDLENET_AUTH_HANDLE(SCOPE_PID_HANDLENET, "auth-handle"), SCOPE_PID_HANDLENET_KEY(SCOPE_PID_HANDLENET, "key"), - HANDLENET_KEY_PATH(SCOPE_PID_HANDLENET_KEY, "path", "dataverse.handlenet.admcredfile"), - HANDLENET_KEY_PASSPHRASE(SCOPE_PID_HANDLENET_KEY, "passphrase", "dataverse.handlenet.admprivphrase"), + HANDLENET_KEY_PATH(SCOPE_PID_HANDLENET_KEY, "path"), + HANDLENET_KEY_PASSPHRASE(SCOPE_PID_HANDLENET_KEY, "passphrase"), + + /* + * The deprecated legacy settings below are from when you could only have a + * single PIDProvider. They mirror the settings above, but are global,not within + * the SCOPE_PID_PROVIDER of an individual provider. + */ + /** + * DEPRECATED PROVIDER DATACITE + * + * @deprecated - legacy single provider setting providing backward compatibility + */ + @Deprecated(forRemoval = true, since = "2024-02-13") + SCOPE_LEGACY_PID_DATACITE(SCOPE_PID, "datacite"), + LEGACY_DATACITE_MDS_API_URL(SCOPE_LEGACY_PID_DATACITE, "mds-api-url", "doi.baseurlstring"), + LEGACY_DATACITE_REST_API_URL(SCOPE_LEGACY_PID_DATACITE, "rest-api-url", "doi.dataciterestapiurlstring", + "doi.mdcbaseurlstring"), + LEGACY_DATACITE_USERNAME(SCOPE_LEGACY_PID_DATACITE, "username", "doi.username"), + LEGACY_DATACITE_PASSWORD(SCOPE_LEGACY_PID_DATACITE, "password", "doi.password"), + + /** + * DEPRECATED PROVIDER EZID + * + * @deprecated - legacy single provider setting providing backward compatibility + */ + @Deprecated(forRemoval = true, since = "2024-02-13") + SCOPE_LEGACY_PID_EZID(SCOPE_PID, "ezid"), LEGACY_EZID_API_URL(SCOPE_LEGACY_PID_EZID, "api-url"), + LEGACY_EZID_USERNAME(SCOPE_LEGACY_PID_EZID, "username"), LEGACY_EZID_PASSWORD(SCOPE_LEGACY_PID_EZID, "password"), + + /** + * DEPRECATED PROVIDER PERMALINK + * + * @deprecated - legacy single provider setting providing backward compatibility + */ + @Deprecated(forRemoval = true, since = "2024-02-13") + SCOPE_LEGACY_PID_PERMALINK(SCOPE_PID, "permalink"), + LEGACY_PERMALINK_BASEURL(SCOPE_LEGACY_PID_PERMALINK, "base-url", "perma.baseurlstring"), + + /** + * DEPRECATED PROVIDER HANDLE + * + * @deprecated - legacy single provider setting providing backward compatibility + */ + @Deprecated(forRemoval = true, since = "2024-02-13") + SCOPE_LEGACY_PID_HANDLENET(SCOPE_PID, "handlenet"), + LEGACY_HANDLENET_INDEX(SCOPE_LEGACY_PID_HANDLENET, "index", "dataverse.handlenet.index"), + @Deprecated(forRemoval = true, since = "2024-02-13") + SCOPE_LEGACY_PID_HANDLENET_KEY(SCOPE_LEGACY_PID_HANDLENET, "key"), + LEGACY_HANDLENET_KEY_PATH(SCOPE_LEGACY_PID_HANDLENET_KEY, "path", "dataverse.handlenet.admcredfile"), + LEGACY_HANDLENET_KEY_PASSPHRASE(SCOPE_LEGACY_PID_HANDLENET_KEY, "passphrase", "dataverse.handlenet.admprivphrase"), // SPI SETTINGS SCOPE_SPI(PREFIX, "spi"), SCOPE_EXPORTERS(SCOPE_SPI, "exporters"), EXPORTERS_DIRECTORY(SCOPE_EXPORTERS, "directory"), + SCOPE_PIDPROVIDERS(SCOPE_SPI, "pidproviders"), + PIDPROVIDERS_DIRECTORY(SCOPE_PIDPROVIDERS, "directory"), // MAIL SETTINGS SCOPE_MAIL(PREFIX, "mail"), SUPPORT_EMAIL(SCOPE_MAIL, "support-email"), CC_SUPPORT_ON_CONTACT_EMAIL(SCOPE_MAIL, "cc-support-on-contact-email"), + + // AUTH SETTINGS + SCOPE_AUTH(PREFIX, "auth"), + // AUTH: OIDC SETTINGS + SCOPE_OIDC(SCOPE_AUTH, "oidc"), + OIDC_ENABLED(SCOPE_OIDC, "enabled"), + OIDC_TITLE(SCOPE_OIDC, "title"), + OIDC_SUBTITLE(SCOPE_OIDC, "subtitle"), + OIDC_AUTH_SERVER_URL(SCOPE_OIDC, "auth-server-url"), + OIDC_CLIENT_ID(SCOPE_OIDC, "client-id"), + OIDC_CLIENT_SECRET(SCOPE_OIDC, "client-secret"), + SCOPE_OIDC_PKCE(SCOPE_OIDC, "pkce"), + OIDC_PKCE_ENABLED(SCOPE_OIDC_PKCE, "enabled"), + OIDC_PKCE_METHOD(SCOPE_OIDC_PKCE, "method"), + OIDC_PKCE_CACHE_MAXSIZE(SCOPE_OIDC_PKCE, "max-cache-size"), + OIDC_PKCE_CACHE_MAXAGE(SCOPE_OIDC_PKCE, "max-cache-age"), // UI SETTINGS SCOPE_UI(PREFIX, "ui"), @@ -132,6 +216,17 @@ public enum JvmSettings { // NetCDF SETTINGS SCOPE_NETCDF(PREFIX, "netcdf"), GEO_EXTRACT_S3_DIRECT_UPLOAD(SCOPE_NETCDF, "geo-extract-s3-direct-upload"), + + // BAGIT SETTINGS + SCOPE_BAGIT(PREFIX, "bagit"), + SCOPE_BAGIT_SOURCEORG(SCOPE_BAGIT, "sourceorg"), + BAGIT_SOURCE_ORG_NAME(SCOPE_BAGIT_SOURCEORG, "name"), + BAGIT_SOURCEORG_ADDRESS(SCOPE_BAGIT_SOURCEORG, "address"), + BAGIT_SOURCEORG_EMAIL(SCOPE_BAGIT_SOURCEORG, "email"), + + // STORAGE USE SETTINGS + SCOPE_STORAGEUSE(PREFIX, "storageuse"), + STORAGEUSE_DISABLE_UPDATES(SCOPE_STORAGEUSE, "disable-storageuse-increments"), ; private static final String SCOPE_SEPARATOR = "."; diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 2826df74ed1..b05c88c0be2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -4,14 +4,12 @@ import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean; import edu.harvard.iq.dataverse.api.ApiBlockingFilter; import edu.harvard.iq.dataverse.util.StringUtil; - +import edu.harvard.iq.dataverse.util.json.JsonUtil; import jakarta.ejb.EJB; import jakarta.ejb.Stateless; import jakarta.inject.Named; -import jakarta.json.Json; import jakarta.json.JsonArray; import jakarta.json.JsonObject; -import jakarta.json.JsonReader; import jakarta.json.JsonValue; import jakarta.persistence.EntityManager; import jakarta.persistence.PersistenceContext; @@ -20,7 +18,6 @@ import org.json.JSONException; import org.json.JSONObject; -import java.io.StringReader; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -90,6 +87,15 @@ public enum Key { * StorageSite database table. */ LocalDataAccessPath, + /** + * The algorithm used to generate PIDs, randomString (default) or + * storedProcedure + * + * @deprecated New installations should not use this database setting, but use + * the settings within {@link JvmSettings#SCOPE_PID}. + * + */ + @Deprecated(forRemoval = true, since = "2024-02-13") IdentifierGenerationStyle, OAuth2CallbackUrl, DefaultAuthProvider, @@ -192,19 +198,42 @@ public enum Key { SignUpUrl, /** Key for whether we allow users to sign up */ AllowSignUp, - /** protocol for global id */ + /** + * protocol for global id + * + * @deprecated New installations should not use this database setting, but use + * the settings within {@link JvmSettings#SCOPE_PID}. + * + */ + @Deprecated(forRemoval = true, since = "2024-02-13") Protocol, - /** authority for global id */ + /** + * authority for global id + * + * @deprecated New installations should not use this database setting, but use + * the settings within {@link JvmSettings#SCOPE_PID}. + * + */ + @Deprecated(forRemoval = true, since = "2024-02-13") Authority, - /** DoiProvider for global id */ + /** + * DoiProvider for global id + * + * @deprecated New installations should not use this database setting, but use + * the settings within {@link JvmSettings#SCOPE_PID}. + * + */ + @Deprecated(forRemoval = true, since = "2024-02-13") DoiProvider, - /** Shoulder for global id - used to create a common prefix on identifiers */ + /** + * Shoulder for global id - used to create a common prefix on identifiers + * + * @deprecated New installations should not use this database setting, but use + * the settings within {@link JvmSettings#SCOPE_PID}. + * + */ + @Deprecated(forRemoval = true, since = "2024-02-13") Shoulder, - /* Removed for now - tried to add here but DOI Service Bean didn't like it at start-up - DoiUsername, - DoiPassword, - DoiBaseurlstring, - */ /** Optionally override http://guides.dataverse.org . */ GuidesBaseUrl, @@ -350,10 +379,16 @@ Whether Harvesting (OAI) service is enabled */ PVCustomPasswordResetAlertMessage, /* - String to describe DOI format for data files. Default is DEPENDENT. - 'DEPENEDENT' means the DOI will be the Dataset DOI plus a file DOI with a slash in between. - 'INDEPENDENT' means a new global id, completely independent from the dataset-level global id. - */ + * String to describe DOI format for data files. Default is DEPENDENT. + * 'DEPENDENT' means the DOI will be the Dataset DOI plus a file DOI with a + * slash in between. 'INDEPENDENT' means a new global id, completely independent + * from the dataset-level global id. + * + * @deprecated New installations should not use this database setting, but use + * the settings within {@link JvmSettings#SCOPE_PID}. + * + */ + @Deprecated(forRemoval = true, since = "2024-02-13") DataFilePIDFormat, /* Json array of supported languages */ @@ -361,7 +396,7 @@ Whether Harvesting (OAI) service is enabled /* Number for the minimum number of files to send PID registration to asynchronous workflow */ - PIDAsynchRegFileCount, + //PIDAsynchRegFileCount, /** * */ @@ -369,12 +404,22 @@ Whether Harvesting (OAI) service is enabled /** * Indicates if the Handle service is setup to work 'independently' (No communication with the Global Handle Registry) + * + * @deprecated New installations should not use this database setting, but use + * the settings within {@link JvmSettings#SCOPE_PID}. + * */ + @Deprecated(forRemoval = true, since = "2024-02-13") IndependentHandleService, /** Handle to use for authentication if the default is not being used - */ + * + * @deprecated New installations should not use this database setting, but use + * the settings within {@link JvmSettings#SCOPE_PID}. + * + */ + @Deprecated(forRemoval = true, since = "2024-02-13") HandleAuthHandle, /** @@ -464,18 +509,6 @@ Whether Harvesting (OAI) service is enabled */ ExportInstallationAsDistributorOnlyWhenNotSet, - /** - * Basic Globus Token for Globus Application - */ - GlobusBasicToken, - /** - * GlobusEndpoint is Globus endpoint for Globus application - */ - GlobusEndpoint, - /** - * Comma separated list of Globus enabled stores - */ - GlobusStores, /** Globus App URL * */ @@ -578,7 +611,16 @@ Whether Harvesting (OAI) service is enabled /** * The URL for the DvWebLoader tool (see github.com/gdcc/dvwebloader for details) */ - WebloaderUrl, + WebloaderUrl, + /** + * Enforce storage quotas: + */ + UseStorageQuotas, + /** + * Placeholder storage quota (defines the same quota setting for every user; used to test the concept of a quota. + */ + StorageQuotaSizeInBytes, + /** * A comma-separated list of CategoryName in the desired order for files to be * sorted in the file table display. If not set, files will be sorted @@ -599,7 +641,17 @@ Whether Harvesting (OAI) service is enabled /* * True/false(default) option deciding whether file PIDs can be enabled per collection - using the Dataverse/collection set attribute API call. */ - AllowEnablingFilePIDsPerCollection + AllowEnablingFilePIDsPerCollection, + /** + * Allows an instance admin to disable Solr search facets on the collection + * and dataset pages instantly + */ + DisableSolrFacets, + /** + * When ingesting tabular data files, store the generated tab-delimited + * files *with* the variable names line up top. + */ + StoreIngestedTabularFilesWithVarHeaders ; @Override @@ -687,8 +739,8 @@ public Long getValueForCompoundKeyAsLong(Key key, String param){ try { return Long.parseLong(val); } catch (NumberFormatException ex) { - try ( StringReader rdr = new StringReader(val) ) { - JsonObject settings = Json.createReader(rdr).readObject(); + try { + JsonObject settings = JsonUtil.getJsonObject(val); if(settings.containsKey(param)) { return Long.parseLong(settings.getString(param)); } else if(settings.containsKey("default")) { @@ -721,8 +773,8 @@ public Boolean getValueForCompoundKeyAsBoolean(Key key, String param) { return null; } - try (StringReader rdr = new StringReader(val)) { - JsonObject settings = Json.createReader(rdr).readObject(); + try { + JsonObject settings = JsonUtil.getJsonObject(val); if (settings.containsKey(param)) { return Boolean.parseBoolean(settings.getString(param)); } else if (settings.containsKey("default")) { @@ -888,8 +940,7 @@ public Map getBaseMetadataLanguageMap(Map languag if(mlString.isEmpty()) { mlString="[]"; } - JsonReader jsonReader = Json.createReader(new StringReader(mlString)); - JsonArray languages = jsonReader.readArray(); + JsonArray languages = JsonUtil.getJsonArray(mlString); for(JsonValue jv: languages) { JsonObject lang = (JsonObject) jv; languageMap.put(lang.getString("locale"), lang.getString("title")); diff --git a/src/main/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtil.java b/src/main/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtil.java index e32b811ee2c..86ae697f771 100644 --- a/src/main/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtil.java @@ -3,6 +3,8 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObjectContainer; +import edu.harvard.iq.dataverse.settings.ConfigCheckService; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.xml.XmlValidator; import java.io.File; @@ -210,16 +212,17 @@ public static boolean stageFileExists() { } return false; } - + + /** + * Lookup the location where to generate the sitemap. + * + * Note: the location is checked to be configured, does exist and is writeable in + * {@link ConfigCheckService#checkSystemDirectories()} + * + * @return Sitemap storage location ([docroot]/sitemap) + */ private static String getSitemapPathString() { - String sitemapPathString = "/tmp"; - // i.e. /usr/local/glassfish4/glassfish/domains/domain1 - String domainRoot = System.getProperty("com.sun.aas.instanceRoot"); - if (domainRoot != null) { - // Note that we write to a directory called "sitemap" but we serve just "/sitemap.xml" using PrettyFaces. - sitemapPathString = domainRoot + File.separator + "docroot" + File.separator + "sitemap"; - } - return sitemapPathString; + return JvmSettings.DOCROOT_DIRECTORY.lookup() + File.separator + "sitemap"; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java new file mode 100644 index 00000000000..d00f7041e61 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java @@ -0,0 +1,116 @@ +package edu.harvard.iq.dataverse.storageuse; + +import edu.harvard.iq.dataverse.DvObject; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.OneToOne; +import java.io.Serializable; +import java.util.logging.Logger; + +//import jakarta.persistence.*; + +/** + * + * @author landreev + * + */ +@Entity +public class StorageQuota implements Serializable { + private static final Logger logger = Logger.getLogger(StorageQuota.class.getCanonicalName()); + + /** + * Only Collection quotas are supported, for now + */ + + private static final long serialVersionUID = 1L; + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + /** + * For defining quotas for Users and/or Groups + * (Not supported as of yet) + + @Column(nullable = true) + private String assigneeIdentifier; + */ + + /** + * Could be changed to ManyToOne - if we wanted to be able to define separate + * quotas on the same collection for different users. (?) + * Whether we actually want to support the above is TBD. (possibly not) + * Only collection-wide quotas are supported for now. + */ + @OneToOne + @JoinColumn(name="definitionPoint_id", nullable=true) + private DvObject definitionPoint; + + @Column(nullable = true) + private Long allocation; + + public StorageQuota() {} + + /** + * Could be uncommented if/when we want to add per-user quotas (see above) + public String getAssigneeIdentifier() { + return assigneeIdentifier; + } + + public void setAssigneeIdentifier(String assigneeIdentifier) { + this.assigneeIdentifier = assigneeIdentifier; + }*/ + + public DvObject getDefinitionPoint() { + return definitionPoint; + } + + public void setDefinitionPoint(DvObject definitionPoint) { + this.definitionPoint = definitionPoint; + } + + public Long getAllocation() { + return allocation; + } + + public void setAllocation(Long allocation) { + this.allocation = allocation; + } + + @Override + public int hashCode() { + int hash = 0; + hash += (id != null ? id.hashCode() : 0); + return hash; + } + + @Override + public boolean equals(Object object) { + // TODO: Warning - this method won't work in the case the id fields are not set + if (!(object instanceof StorageQuota)) { + return false; + } + StorageQuota other = (StorageQuota) object; + if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { + return false; + } + return true; + } + + @Override + public String toString() { + return "edu.harvard.iq.dataverse.storageuse.StorageQuota[ id=" + id + " ]"; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java new file mode 100644 index 00000000000..b777736dc8d --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java @@ -0,0 +1,100 @@ +package edu.harvard.iq.dataverse.storageuse; + +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.DvObjectContainer; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GenerationType; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; +import jakarta.persistence.OneToOne; +import jakarta.persistence.Table; +import java.io.Serializable; + +/** + * + * @author landreev + */ +@NamedQueries({ + @NamedQuery(name = "StorageUse.findByteSizeByDvContainerId",query = "SELECT su.sizeInBytes FROM StorageUse su WHERE su.dvObjectContainer.id =:dvObjectId "), + @NamedQuery(name = "StorageUse.findByDvContainerId",query = "SELECT su FROM StorageUse su WHERE su.dvObjectContainer.id =:dvObjectId "), + @NamedQuery(name = "StorageUse.incrementByteSizeByDvContainerId", query = "UPDATE StorageUse su SET su.sizeInBytes = su.sizeInBytes +:fileSize WHERE su.dvObjectContainer.id =:dvObjectId") +}) +@Entity +@Table(indexes = {@Index(columnList="dvobjectcontainer_id")}) +public class StorageUse implements Serializable { + + private static final long serialVersionUID = 1L; + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + @OneToOne + @JoinColumn(nullable=false) + private DvObject dvObjectContainer; + + @Column + private Long sizeInBytes = null; + + public StorageUse() {} + + public StorageUse(DvObjectContainer dvObjectContainer) { + this(dvObjectContainer, 0L); + } + + public StorageUse(DvObjectContainer dvObjectContainer, Long sizeInBytes) { + this.dvObjectContainer = dvObjectContainer; + this.sizeInBytes = sizeInBytes; + } + + public Long getSizeInBytes() { + return sizeInBytes; + } + + public void setSizeInBytes(Long sizeInBytes) { + this.sizeInBytes = sizeInBytes; + } + + public void incrementSizeInBytes(Long sizeInBytes) { + this.sizeInBytes += sizeInBytes; + } + + + @Override + public int hashCode() { + int hash = 0; + hash += (id != null ? id.hashCode() : 0); + return hash; + } + + @Override + public boolean equals(Object object) { + // TODO: Warning - this method won't work in the case the id fields are not set + if (!(object instanceof StorageUse)) { + return false; + } + StorageUse other = (StorageUse) object; + if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { + return false; + } + return true; + } + + @Override + public String toString() { + return "edu.harvard.iq.dataverse.storageuse.StorageUse[ id=" + id + " ]"; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java new file mode 100644 index 00000000000..7aea7a7b596 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java @@ -0,0 +1,72 @@ +package edu.harvard.iq.dataverse.storageuse; + +import edu.harvard.iq.dataverse.settings.JvmSettings; +import jakarta.ejb.Stateless; +import jakarta.ejb.TransactionAttribute; +import jakarta.ejb.TransactionAttributeType; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import java.util.Optional; +import java.util.logging.Logger; + +/** + * + * @author landreev + */ +@Stateless +@Named +public class StorageUseServiceBean implements java.io.Serializable { + private static final Logger logger = Logger.getLogger(StorageUseServiceBean.class.getCanonicalName()); + + @PersistenceContext(unitName = "VDCNet-ejbPU") + private EntityManager em; + + public StorageUse findByDvContainerId(Long dvObjectId) { + return em.createNamedQuery("StorageUse.findByDvContainerId", StorageUse.class).setParameter("dvObjectId", dvObjectId).getSingleResult(); + } + + /** + * Looks up the current storage use size, using a named query in a new + * transaction + * @param dvObjectId + * @return + */ + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public Long findStorageSizeByDvContainerId(Long dvObjectId) { + Long res = em.createNamedQuery("StorageUse.findByteSizeByDvContainerId", Long.class).setParameter("dvObjectId", dvObjectId).getSingleResult(); + return res == null ? 0L : res; + } + + /** + * Increments the recorded storage size for all the dvobject parents of a + * datafile, recursively. + * @param dvObjectContainerId database id of the immediate parent (dataset) + * @param increment size in bytes of the file(s) being added + */ + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void incrementStorageSizeRecursively(Long dvObjectContainerId, Long increment) { + if (dvObjectContainerId != null && increment != null) { + Optional allow = JvmSettings.STORAGEUSE_DISABLE_UPDATES.lookupOptional(Boolean.class); + if (!(allow.isPresent() && allow.get())) { + String queryString = "WITH RECURSIVE uptree (id, owner_id) AS\n" + + "(" + + " SELECT id, owner_id\n" + + " FROM dvobject\n" + + " WHERE id=" + dvObjectContainerId + "\n" + + " UNION ALL\n" + + " SELECT dvobject.id, dvobject.owner_id\n" + + " FROM dvobject\n" + + " JOIN uptree ON dvobject.id = uptree.owner_id)\n" + + "UPDATE storageuse SET sizeinbytes=COALESCE(sizeinbytes,0)+" + increment + "\n" + + "FROM uptree\n" + + "WHERE dvobjectcontainer_id = uptree.id;"; + + int parentsUpdated = em.createNativeQuery(queryString).executeUpdate(); + } + } + // @todo throw an exception if the number of parent dvobjects updated by + // the query is < 2 - ? + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java new file mode 100644 index 00000000000..f7dac52e886 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java @@ -0,0 +1,38 @@ +package edu.harvard.iq.dataverse.storageuse; + +/** + * + * @author landreev + */ +public class UploadSessionQuotaLimit { + private Long totalAllocatedInBytes = 0L; + private Long totalUsageInBytes = 0L; + + public UploadSessionQuotaLimit(Long allocated, Long used) { + this.totalAllocatedInBytes = allocated; + this.totalUsageInBytes = used; + } + + public Long getTotalAllocatedInBytes() { + return totalAllocatedInBytes; + } + + public void setTotalAllocatedInBytes(Long totalAllocatedInBytes) { + this.totalAllocatedInBytes = totalAllocatedInBytes; + } + + public Long getTotalUsageInBytes() { + return totalUsageInBytes; + } + + public void setTotalUsageInBytes(Long totalUsageInBytes) { + this.totalUsageInBytes = totalUsageInBytes; + } + + public Long getRemainingQuotaInBytes() { + if (totalUsageInBytes > totalAllocatedInBytes) { + return 0L; + } + return totalAllocatedInBytes - totalUsageInBytes; + } + } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 3c16321c590..e7d7e2c8eb2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -28,18 +28,22 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.Embargo; import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; import edu.harvard.iq.dataverse.dataset.DatasetThumbnail; import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; + +import static edu.harvard.iq.dataverse.api.ApiConstants.DS_VERSION_DRAFT; import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; import edu.harvard.iq.dataverse.ingest.IngestReport; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper; import edu.harvard.iq.dataverse.ingest.IngestableDataChecker; import edu.harvard.iq.dataverse.license.License; +import edu.harvard.iq.dataverse.settings.ConfigCheckService; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.file.BagItFileHandler; import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; @@ -94,19 +98,14 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; -import org.apache.commons.io.FileUtils; import java.util.zip.GZIPInputStream; -import java.util.zip.ZipFile; -import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; import org.apache.commons.io.FilenameUtils; import edu.harvard.iq.dataverse.dataaccess.DataAccessOption; import edu.harvard.iq.dataverse.dataaccess.StorageIO; -import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; +import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException; import java.util.Arrays; -import java.util.Enumeration; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import ucar.nc2.NetcdfFile; @@ -397,7 +396,7 @@ public static String getUserFriendlyOriginalType(DataFile dataFile) { * Returns a content type string for a FileObject * */ - private static String determineContentType(File fileObject) { + public static String determineContentType(File fileObject) { if (fileObject==null){ return null; } @@ -797,496 +796,6 @@ public static String generateOriginalExtension(String fileType) { } return ""; } - - public static CreateDataFileResult createDataFiles(DatasetVersion version, InputStream inputStream, - String fileName, String suppliedContentType, String newStorageIdentifier, String newCheckSum, - SystemConfig systemConfig) throws IOException { - ChecksumType checkSumType = DataFile.ChecksumType.MD5; - if (newStorageIdentifier == null) { - checkSumType = systemConfig.getFileFixityChecksumAlgorithm(); - } - return createDataFiles(version, inputStream, fileName, suppliedContentType, newStorageIdentifier, newCheckSum, checkSumType, systemConfig); - } - - public static CreateDataFileResult createDataFiles(DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, String newCheckSum, ChecksumType newCheckSumType, SystemConfig systemConfig) throws IOException { - List datafiles = new ArrayList<>(); - - //When there is no checksum/checksumtype being sent (normal upload, needs to be calculated), set the type to the current default - if(newCheckSumType == null) { - newCheckSumType = systemConfig.getFileFixityChecksumAlgorithm(); - } - - String warningMessage = null; - - // save the file, in the temporary location for now: - Path tempFile = null; - - Long fileSizeLimit = systemConfig.getMaxFileUploadSizeForStore(version.getDataset().getEffectiveStorageDriverId()); - String finalType = null; - if (newStorageIdentifier == null) { - if (getFilesTempDirectory() != null) { - tempFile = Files.createTempFile(Paths.get(getFilesTempDirectory()), "tmp", "upload"); - // "temporary" location is the key here; this is why we are not using - // the DataStore framework for this - the assumption is that - // temp files will always be stored on the local filesystem. - // -- L.A. Jul. 2014 - logger.fine("Will attempt to save the file as: " + tempFile.toString()); - Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING); - - // A file size check, before we do anything else: - // (note that "no size limit set" = "unlimited") - // (also note, that if this is a zip file, we'll be checking - // the size limit for each of the individual unpacked files) - Long fileSize = tempFile.toFile().length(); - if (fileSizeLimit != null && fileSize > fileSizeLimit) { - try { - tempFile.toFile().delete(); - } catch (Exception ex) { - } - throw new IOException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(fileSize), bytesToHumanReadable(fileSizeLimit))); - } - - } else { - throw new IOException("Temp directory is not configured."); - } - logger.fine("mime type supplied: " + suppliedContentType); - // Let's try our own utilities (Jhove, etc.) to determine the file type - // of the uploaded file. (We may already have a mime type supplied for this - // file - maybe the type that the browser recognized on upload; or, if - // it's a harvest, maybe the remote server has already given us the type - // for this file... with our own type utility we may or may not do better - // than the type supplied: - // -- L.A. - String recognizedType = null; - - // If we detect the dataverse prefix in the supplied mimetype we do no - // further recognition - if (suppliedContentType.toLowerCase().startsWith(OVERWRITE_MIME_TYPE_PREFIX)) { - finalType = suppliedContentType.toLowerCase().substring(OVERWRITE_MIME_TYPE_PREFIX.length()); - logger.fine("Overwrite prefix detected. Using supplied mime type."); - } - else { - try { - recognizedType = determineFileType(tempFile.toFile(), fileName); - logger.fine("File utility recognized the file as " + recognizedType); - if (recognizedType != null && !recognizedType.equals("")) { - if (useRecognizedType(suppliedContentType, recognizedType)) { - finalType = recognizedType; - } - } - - } catch (Exception ex) { - logger.warning("Failed to run the file utility mime type check on file " + fileName); - } - - if (finalType == null) { - finalType = (suppliedContentType == null || suppliedContentType.equals("")) - ? MIME_TYPE_UNDETERMINED_DEFAULT - : suppliedContentType; - } - } - - // A few special cases: - // if this is a gzipped FITS file, we'll uncompress it, and ingest it as - // a regular FITS file: - if (finalType.equals("application/fits-gzipped")) { - - String finalFileName = fileName; - // if the file name had the ".gz" extension, remove it, - // since we are going to uncompress it: - if (fileName != null && fileName.matches(".*\\.gz$")) { - finalFileName = fileName.replaceAll("\\.gz$", ""); - } - - DataFile datafile = null; - try (InputStream uncompressedIn = new GZIPInputStream(new FileInputStream(tempFile.toFile()))){ - File unZippedTempFile = saveInputStreamInTempFile(uncompressedIn, fileSizeLimit); - datafile = createSingleDataFile(version, unZippedTempFile, finalFileName, MIME_TYPE_UNDETERMINED_DEFAULT, systemConfig.getFileFixityChecksumAlgorithm()); - } catch (IOException | FileExceedsMaxSizeException ioex) { - datafile = null; - } - - // If we were able to produce an uncompressed file, we'll use it - // to create and return a final DataFile; if not, we're not going - // to do anything - and then a new DataFile will be created further - // down, from the original, uncompressed file. - if (datafile != null) { - // remove the compressed temp file: - try { - tempFile.toFile().delete(); - } catch (SecurityException ex) { - // (this is very non-fatal) - logger.warning("Failed to delete temporary file " + tempFile.toString()); - } - - datafiles.add(datafile); - return CreateDataFileResult.success(fileName, finalType, datafiles); - } - - // If it's a ZIP file, we are going to unpack it and create multiple - // DataFile objects from its contents: - } else if (finalType.equals("application/zip")) { - - ZipFile zipFile = null; - ZipInputStream unZippedIn = null; - ZipEntry zipEntry = null; - - int fileNumberLimit = systemConfig.getZipUploadFilesLimit(); - - try { - Charset charset = null; - /* - TODO: (?) - We may want to investigate somehow letting the user specify - the charset for the filenames in the zip file... - - otherwise, ZipInputStream bails out if it encounteres a file - name that's not valid in the current charest (i.e., UTF-8, in - our case). It would be a bit trickier than what we're doing for - SPSS tabular ingests - with the lang. encoding pulldown menu - - because this encoding needs to be specified *before* we upload and - attempt to unzip the file. - -- L.A. 4.0 beta12 - logger.info("default charset is "+Charset.defaultCharset().name()); - if (Charset.isSupported("US-ASCII")) { - logger.info("charset US-ASCII is supported."); - charset = Charset.forName("US-ASCII"); - if (charset != null) { - logger.info("was able to obtain charset for US-ASCII"); - } - - } - */ - - /** - * Perform a quick check for how many individual files are - * inside this zip archive. If it's above the limit, we can - * give up right away, without doing any unpacking. - * This should be a fairly inexpensive operation, we just need - * to read the directory at the end of the file. - */ - - if (charset != null) { - zipFile = new ZipFile(tempFile.toFile(), charset); - } else { - zipFile = new ZipFile(tempFile.toFile()); - } - /** - * The ZipFile constructors above will throw ZipException - - * a type of IOException - if there's something wrong - * with this file as a zip. There's no need to intercept it - * here, it will be caught further below, with other IOExceptions, - * at which point we'll give up on trying to unpack it and - * then attempt to save it as is. - */ - - int numberOfUnpackableFiles = 0; - /** - * Note that we can't just use zipFile.size(), - * unfortunately, since that's the total number of entries, - * some of which can be directories. So we need to go - * through all the individual zipEntries and count the ones - * that are files. - */ - - for (Enumeration entries = zipFile.entries(); entries.hasMoreElements();) { - ZipEntry entry = entries.nextElement(); - logger.fine("inside first zip pass; this entry: "+entry.getName()); - if (!entry.isDirectory()) { - String shortName = entry.getName().replaceFirst("^.*[\\/]", ""); - // ... and, finally, check if it's a "fake" file - a zip archive entry - // created for a MacOS X filesystem element: (these - // start with "._") - if (!shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName)) { - numberOfUnpackableFiles++; - if (numberOfUnpackableFiles > fileNumberLimit) { - logger.warning("Zip upload - too many files in the zip to process individually."); - warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit - + "); please upload a zip archive with fewer files, if you want them to be ingested " - + "as individual DataFiles."; - throw new IOException(); - } - // In addition to counting the files, we can - // also check the file size while we're here, - // provided the size limit is defined; if a single - // file is above the individual size limit, unzipped, - // we give up on unpacking this zip archive as well: - if (fileSizeLimit != null && entry.getSize() > fileSizeLimit) { - throw new FileExceedsMaxSizeException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(entry.getSize()), bytesToHumanReadable(fileSizeLimit))); - } - } - } - } - - // OK we're still here - that means we can proceed unzipping. - - // Close the ZipFile, re-open as ZipInputStream: - zipFile.close(); - - if (charset != null) { - unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile()), charset); - } else { - unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile())); - } - - while (true) { - try { - zipEntry = unZippedIn.getNextEntry(); - } catch (IllegalArgumentException iaex) { - // Note: - // ZipInputStream documentation doesn't even mention that - // getNextEntry() throws an IllegalArgumentException! - // but that's what happens if the file name of the next - // entry is not valid in the current CharSet. - // -- L.A. - warningMessage = "Failed to unpack Zip file. (Unknown Character Set used in a file name?) Saving the file as is."; - logger.warning(warningMessage); - throw new IOException(); - } - - if (zipEntry == null) { - break; - } - // Note that some zip entries may be directories - we - // simply skip them: - - if (!zipEntry.isDirectory()) { - if (datafiles.size() > fileNumberLimit) { - logger.warning("Zip upload - too many files."); - warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit - + "); please upload a zip archive with fewer files, if you want them to be ingested " - + "as individual DataFiles."; - throw new IOException(); - } - - String fileEntryName = zipEntry.getName(); - logger.fine("ZipEntry, file: " + fileEntryName); - - if (fileEntryName != null && !fileEntryName.equals("")) { - - String shortName = fileEntryName.replaceFirst("^.*[\\/]", ""); - - // Check if it's a "fake" file - a zip archive entry - // created for a MacOS X filesystem element: (these - // start with "._") - if (!shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName)) { - // OK, this seems like an OK file entry - we'll try - // to read it and create a DataFile with it: - - String storageIdentifier = generateStorageIdentifier(); - File unzippedFile = new File(getFilesTempDirectory() + "/" + storageIdentifier); - Files.copy(unZippedIn, unzippedFile.toPath(), StandardCopyOption.REPLACE_EXISTING); - // No need to check the size of this unpacked file against the size limit, - // since we've already checked for that in the first pass. - - DataFile datafile = createSingleDataFile(version, null, storageIdentifier, shortName, - MIME_TYPE_UNDETERMINED_DEFAULT, - systemConfig.getFileFixityChecksumAlgorithm(), null, false); - - if (!fileEntryName.equals(shortName)) { - // If the filename looks like a hierarchical folder name (i.e., contains slashes and backslashes), - // we'll extract the directory name; then subject it to some "aggressive sanitizing" - strip all - // the leading, trailing and duplicate slashes; then replace all the characters that - // don't pass our validation rules. - String directoryName = fileEntryName.replaceFirst("[\\\\/][\\\\/]*[^\\\\/]*$", ""); - directoryName = StringUtil.sanitizeFileDirectory(directoryName, true); - // if (!"".equals(directoryName)) { - if (!StringUtil.isEmpty(directoryName)) { - logger.fine("setting the directory label to " + directoryName); - datafile.getFileMetadata().setDirectoryLabel(directoryName); - } - } - - if (datafile != null) { - // We have created this datafile with the mime type "unknown"; - // Now that we have it saved in a temporary location, - // let's try and determine its real type: - - try { - recognizedType = determineFileType(unzippedFile, shortName); - // null the File explicitly, to release any open FDs: - unzippedFile = null; - logger.fine("File utility recognized unzipped file as " + recognizedType); - if (recognizedType != null && !recognizedType.equals("")) { - datafile.setContentType(recognizedType); - } - } catch (Exception ex) { - logger.warning("Failed to run the file utility mime type check on file " + fileName); - } - - datafiles.add(datafile); - } - } - } - } - unZippedIn.closeEntry(); - - } - - } catch (IOException ioex) { - // just clear the datafiles list and let - // ingest default to creating a single DataFile out - // of the unzipped file. - logger.warning("Unzipping failed; rolling back to saving the file as is."); - if (warningMessage == null) { - warningMessage = BundleUtil.getStringFromBundle("file.addreplace.warning.unzip.failed"); - } - - datafiles.clear(); - } catch (FileExceedsMaxSizeException femsx) { - logger.warning("One of the unzipped files exceeds the size limit; resorting to saving the file as is. " + femsx.getMessage()); - warningMessage = BundleUtil.getStringFromBundle("file.addreplace.warning.unzip.failed.size", Arrays.asList(FileSizeChecker.bytesToHumanReadable(fileSizeLimit))); - datafiles.clear(); - } finally { - if (zipFile != null) { - try { - zipFile.close(); - } catch (Exception zEx) {} - } - if (unZippedIn != null) { - try { - unZippedIn.close(); - } catch (Exception zEx) {} - } - } - if (!datafiles.isEmpty()) { - // remove the uploaded zip file: - try { - Files.delete(tempFile); - } catch (IOException ioex) { - // do nothing - it's just a temp file. - logger.warning("Could not remove temp file " + tempFile.getFileName().toString()); - } - // and return: - return CreateDataFileResult.success(fileName, finalType, datafiles); - } - - } else if (finalType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)) { - // Shape files may have to be split into multiple files, - // one zip archive per each complete set of shape files: - - // File rezipFolder = new File(this.getFilesTempDirectory()); - File rezipFolder = getShapefileUnzipTempDirectory(); - - IngestServiceShapefileHelper shpIngestHelper; - shpIngestHelper = new IngestServiceShapefileHelper(tempFile.toFile(), rezipFolder); - - boolean didProcessWork = shpIngestHelper.processFile(); - if (!(didProcessWork)) { - logger.severe("Processing of zipped shapefile failed."); - return CreateDataFileResult.error(fileName, finalType); - } - - try { - for (File finalFile : shpIngestHelper.getFinalRezippedFiles()) { - FileInputStream finalFileInputStream = new FileInputStream(finalFile); - finalType = determineContentType(finalFile); - if (finalType == null) { - logger.warning("Content type is null; but should default to 'MIME_TYPE_UNDETERMINED_DEFAULT'"); - continue; - } - - File unZippedShapeTempFile = saveInputStreamInTempFile(finalFileInputStream, fileSizeLimit); - DataFile new_datafile = createSingleDataFile(version, unZippedShapeTempFile, finalFile.getName(), finalType, systemConfig.getFileFixityChecksumAlgorithm()); - String directoryName = null; - String absolutePathName = finalFile.getParent(); - if (absolutePathName != null) { - if (absolutePathName.length() > rezipFolder.toString().length()) { - // This file lives in a subfolder - we want to - // preserve it in the FileMetadata: - directoryName = absolutePathName.substring(rezipFolder.toString().length() + 1); - - if (!StringUtil.isEmpty(directoryName)) { - new_datafile.getFileMetadata().setDirectoryLabel(directoryName); - } - } - } - if (new_datafile != null) { - datafiles.add(new_datafile); - } else { - logger.severe("Could not add part of rezipped shapefile. new_datafile was null: " + finalFile.getName()); - } - finalFileInputStream.close(); - - } - } catch (FileExceedsMaxSizeException femsx) { - logger.severe("One of the unzipped shape files exceeded the size limit; giving up. " + femsx.getMessage()); - datafiles.clear(); - } - - // Delete the temp directory used for unzipping - // The try-catch is due to error encountered in using NFS for stocking file, - // cf. https://github.com/IQSS/dataverse/issues/5909 - try { - FileUtils.deleteDirectory(rezipFolder); - } catch (IOException ioex) { - // do nothing - it's a tempo folder. - logger.warning("Could not remove temp folder, error message : " + ioex.getMessage()); - } - - if (datafiles.size() > 0) { - // remove the uploaded zip file: - try { - Files.delete(tempFile); - } catch (IOException ioex) { - // do nothing - it's just a temp file. - logger.warning("Could not remove temp file " + tempFile.getFileName().toString()); - } catch (SecurityException se) { - logger.warning("Unable to delete: " + tempFile.toString() + "due to Security Exception: " - + se.getMessage()); - } - return CreateDataFileResult.success(fileName, finalType, datafiles); - } else { - logger.severe("No files added from directory of rezipped shapefiles"); - } - return CreateDataFileResult.error(fileName, finalType); - - } else if (finalType.equalsIgnoreCase(BagItFileHandler.FILE_TYPE)) { - Optional bagItFileHandler = CDI.current().select(BagItFileHandlerFactory.class).get().getBagItFileHandler(); - if (bagItFileHandler.isPresent()) { - CreateDataFileResult result = bagItFileHandler.get().handleBagItPackage(systemConfig, version, fileName, tempFile.toFile()); - return result; - } - } - } else { - // Default to suppliedContentType if set or the overall undetermined default if a contenttype isn't supplied - finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; - String type = determineFileTypeByNameAndExtension(fileName); - if (!StringUtils.isBlank(type)) { - //Use rules for deciding when to trust browser supplied type - if (useRecognizedType(finalType, type)) { - finalType = type; - } - logger.fine("Supplied type: " + suppliedContentType + ", finalType: " + finalType); - } - } - // Finally, if none of the special cases above were applicable (or - // if we were unable to unpack an uploaded file, etc.), we'll just - // create and return a single DataFile: - File newFile = null; - if (tempFile != null) { - newFile = tempFile.toFile(); - } - - - DataFile datafile = createSingleDataFile(version, newFile, newStorageIdentifier, fileName, finalType, newCheckSumType, newCheckSum); - File f = null; - if (tempFile != null) { - f = tempFile.toFile(); - } - if (datafile != null && ((f != null) || (newStorageIdentifier != null))) { - - if (warningMessage != null) { - createIngestFailureReport(datafile, warningMessage); - datafile.SetIngestProblem(); - } - datafiles.add(datafile); - - return CreateDataFileResult.success(fileName, finalType, datafiles); - } - - return CreateDataFileResult.error(fileName, finalType); - } // end createDataFiles - public static boolean useRecognizedType(String suppliedContentType, String recognizedType) { // is it any better than the type that was supplied to us, @@ -1325,7 +834,12 @@ public static boolean useRecognizedType(String suppliedContentType, String recog } public static File saveInputStreamInTempFile(InputStream inputStream, Long fileSizeLimit) - throws IOException, FileExceedsMaxSizeException { + throws IOException, FileExceedsMaxSizeException, FileExceedsStorageQuotaException { + return saveInputStreamInTempFile(inputStream, fileSizeLimit, null); + } + + public static File saveInputStreamInTempFile(InputStream inputStream, Long fileSizeLimit, Long storageQuotaLimit) + throws IOException, FileExceedsMaxSizeException, FileExceedsStorageQuotaException { Path tempFile = Files.createTempFile(Paths.get(getFilesTempDirectory()), "tmp", "upload"); if (inputStream != null && tempFile != null) { @@ -1336,7 +850,12 @@ public static File saveInputStreamInTempFile(InputStream inputStream, Long fileS Long fileSize = tempFile.toFile().length(); if (fileSizeLimit != null && fileSize > fileSizeLimit) { try {tempFile.toFile().delete();} catch (Exception ex) {} - throw new FileExceedsMaxSizeException (MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(fileSize), bytesToHumanReadable(fileSizeLimit))); + throw new FileExceedsMaxSizeException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(fileSize), bytesToHumanReadable(fileSizeLimit))); + } + + if (storageQuotaLimit != null && fileSize > storageQuotaLimit) { + try {tempFile.toFile().delete();} catch (Exception ex) {} + throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(fileSize), bytesToHumanReadable(storageQuotaLimit))); } return tempFile.toFile(); @@ -1379,7 +898,6 @@ public static DataFile createSingleDataFile(DatasetVersion version, File tempFil datafile.setPermissionModificationTime(new Timestamp(new Date().getTime())); FileMetadata fmd = new FileMetadata(); - // TODO: add directoryLabel? fmd.setLabel(fileName); if (addToDataset) { @@ -1426,7 +944,7 @@ public static DataFile createSingleDataFile(DatasetVersion version, File tempFil Naming convention: getFilesTempDirectory() + "shp_" + "yyyy-MM-dd-hh-mm-ss-SSS" */ - private static File getShapefileUnzipTempDirectory(){ + public static File getShapefileUnzipTempDirectory(){ String tempDirectory = getFilesTempDirectory(); if (tempDirectory == null){ @@ -1490,25 +1008,17 @@ public static boolean canIngestAsTabular(String mimeType) { } } + /** + * Return the location where data should be stored temporarily after uploading (UI or API) + * for local processing (ingest, unzip, ...) and transfer to final destination (see storage subsystem). + * + * This location is checked to be configured, does exist, and is writeable via + * {@link ConfigCheckService#checkSystemDirectories()}. + * + * @return String with a path to the temporary location. Will not be null (former versions did to indicate failure) + */ public static String getFilesTempDirectory() { - - String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); - String filesTempDirectory = filesRootDirectory + "/temp"; - - if (!Files.exists(Paths.get(filesTempDirectory))) { - /* Note that "createDirectories()" must be used - not - * "createDirectory()", to make sure all the parent - * directories that may not yet exist are created as well. - */ - try { - Files.createDirectories(Paths.get(filesTempDirectory)); - } catch (IOException ex) { - logger.severe("Failed to create filesTempDirectory: " + filesTempDirectory ); - return null; - } - } - - return filesTempDirectory; + return JvmSettings.FILES_DIRECTORY.lookup() + File.separator + "temp"; } public static void generateS3PackageStorageIdentifier(DataFile dataFile) { @@ -1610,6 +1120,11 @@ public static boolean isRequestAccessPopupRequired(DatasetVersion datasetVersion if (answer != null) { return answer; } + // 3. Guest Book: + if (datasetVersion.getDataset() != null && datasetVersion.getDataset().getGuestbook() != null && datasetVersion.getDataset().getGuestbook().isEnabled() && datasetVersion.getDataset().getGuestbook().getDataverse() != null) { + logger.fine("Request access popup required because of guestbook."); + return true; + } logger.fine("Request access popup is not required."); return false; } @@ -1651,6 +1166,49 @@ private static Boolean popupDueToStateOrTerms(DatasetVersion datasetVersion) { return null; } + /** + * isGuestbookAndTermsPopupRequired + * meant to replace both isDownloadPopupRequired() and isRequestAccessDownloadPopupRequired() when the guestbook-terms-popup-fragment.xhtml + * replaced file-download-popup-fragment.xhtml and file-request-access-popup-fragment.xhtml + * @param datasetVersion + * @return boolean + */ + + public static boolean isGuestbookAndTermsPopupRequired(DatasetVersion datasetVersion) { + return isGuestbookPopupRequired(datasetVersion) || isTermsPopupRequired(datasetVersion); + } + + public static boolean isGuestbookPopupRequired(DatasetVersion datasetVersion) { + + if (datasetVersion == null) { + logger.fine("GuestbookPopup not required because datasetVersion is null."); + return false; + } + //0. if version is draft then Popup "not required" + if (!datasetVersion.isReleased()) { + logger.fine("GuestbookPopup not required because datasetVersion has not been released."); + return false; + } + + // 3. Guest Book: + if (datasetVersion.getDataset() != null && datasetVersion.getDataset().getGuestbook() != null && datasetVersion.getDataset().getGuestbook().isEnabled() && datasetVersion.getDataset().getGuestbook().getDataverse() != null) { + logger.fine("GuestbookPopup required because an enabled guestbook exists."); + return true; + } + + logger.fine("GuestbookPopup is not required."); + return false; + } + + public static boolean isTermsPopupRequired(DatasetVersion datasetVersion) { + Boolean answer = popupDueToStateOrTerms(datasetVersion); + if(answer == null) { + logger.fine("TermsPopup is not required."); + return false; + } + return answer; + } + /** * Provide download URL if no Terms of Use, no guestbook, and not * restricted. @@ -1871,6 +1429,17 @@ public static S3AccessIO getS3AccessForDirectUpload(Dataset dataset) { return s3io; } + private static InputStream getOriginalFileInputStream(StorageIO storage, boolean isTabularData) throws IOException { + storage.open(DataAccessOption.READ_ACCESS); + if (!isTabularData) { + return storage.getInputStream(); + } else { + // if this is a tabular file, read the preserved original "auxiliary file" + // instead: + return storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); + } + } + public static void validateDataFileChecksum(DataFile dataFile) throws IOException { DataFile.ChecksumType checksumType = dataFile.getChecksumType(); if (checksumType == null) { @@ -1880,35 +1449,24 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio } StorageIO storage = dataFile.getStorageIO(); - InputStream in = null; - - try { - storage.open(DataAccessOption.READ_ACCESS); + String recalculatedChecksum = null; - if (!dataFile.isTabularData()) { - in = storage.getInputStream(); - } else { - // if this is a tabular file, read the preserved original "auxiliary file" - // instead: - in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); - } + try (InputStream inputStream = getOriginalFileInputStream(storage, dataFile.isTabularData())) { + recalculatedChecksum = FileUtil.calculateChecksum(inputStream, checksumType); } catch (IOException ioex) { - in = null; - } - - if (in == null) { String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failRead", Arrays.asList(dataFile.getId().toString())); logger.log(Level.INFO, info); throw new IOException(info); - } - - String recalculatedChecksum = null; - try { - recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); } catch (RuntimeException rte) { + logger.log(Level.SEVERE, "failed to calculated checksum, one retry", rte); recalculatedChecksum = null; - } finally { - IOUtils.closeQuietly(in); + } + + if (recalculatedChecksum == null) { //retry once + storage = dataFile.getStorageIO(); + try (InputStream inputStream = getOriginalFileInputStream(storage, dataFile.isTabularData())) { + recalculatedChecksum = FileUtil.calculateChecksum(inputStream, checksumType); + } } if (recalculatedChecksum == null) { @@ -1926,19 +1484,12 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio boolean fixed = false; if (!dataFile.isTabularData() && dataFile.getIngestReport() != null) { // try again, see if the .orig file happens to be there: - try { - in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); - } catch (IOException ioex) { - in = null; + try (InputStream in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION)) { + recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); + } catch (RuntimeException rte) { + recalculatedChecksum = null; } - if (in != null) { - try { - recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); - } catch (RuntimeException rte) { - recalculatedChecksum = null; - } finally { - IOUtils.closeQuietly(in); - } + if (recalculatedChecksum != null) { // try again: if (recalculatedChecksum.equals(dataFile.getChecksumValue())) { fixed = true; @@ -2162,7 +1713,7 @@ private static String getFileAccessUrl(FileMetadata fileMetadata, String apiLoca private static String getFolderAccessUrl(DatasetVersion version, String currentFolder, String subFolder, String apiLocation, boolean originals) { String datasetId = version.getDataset().getId().toString(); String versionTag = version.getFriendlyVersionNumber(); - versionTag = versionTag.replace("DRAFT", ":draft"); + versionTag = versionTag.replace("DRAFT", DS_VERSION_DRAFT); if (!"".equals(currentFolder)) { subFolder = currentFolder + "/" + subFolder; } @@ -2226,5 +1777,11 @@ public static boolean isActivelyEmbargoed(List fmdList) { } return false; } + + + public static String getStorageDriver(DataFile dataFile) { + String storageIdentifier = dataFile.getStorageIdentifier(); + return storageIdentifier.substring(0, storageIdentifier.indexOf(DataAccess.SEPARATOR)); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java index dcb6e078df6..0724e53700b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.util; +import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.UserNotification; @@ -39,6 +40,8 @@ public static String getSubjectTextBasedOnNotification(UserNotification userNoti datasetDisplayName = ((Dataset) objectOfNotification).getDisplayName(); } else if (objectOfNotification instanceof DatasetVersion) { datasetDisplayName = ((DatasetVersion) objectOfNotification).getDataset().getDisplayName(); + } else if (objectOfNotification instanceof DataFile) { + datasetDisplayName = ((DataFile) objectOfNotification).getOwner().getDisplayName(); } } @@ -50,7 +53,9 @@ public static String getSubjectTextBasedOnNotification(UserNotification userNoti case CREATEDV: return BundleUtil.getStringFromBundle("notification.email.create.dataverse.subject", rootDvNameAsList); case REQUESTFILEACCESS: - return BundleUtil.getStringFromBundle("notification.email.request.file.access.subject", rootDvNameAsList); + return BundleUtil.getStringFromBundle("notification.email.request.file.access.subject", Arrays.asList(rootDvNameAsList.get(0), datasetDisplayName)); + case REQUESTEDFILEACCESS: + return BundleUtil.getStringFromBundle("notification.email.requested.file.access.subject", Arrays.asList(rootDvNameAsList.get(0), datasetDisplayName)); case GRANTFILEACCESS: return BundleUtil.getStringFromBundle("notification.email.grant.file.access.subject", rootDvNameAsList); case REJECTFILEACCESS: diff --git a/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java b/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java index 3af562882f3..9786fda4217 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java @@ -72,7 +72,7 @@ public class ShapefileHandler{ public final static List SHAPEFILE_MANDATORY_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj"); public final static String SHP_XML_EXTENSION = "shp.xml"; public final static String BLANK_EXTENSION = "__PLACEHOLDER-FOR-BLANK-EXTENSION__"; - public final static List SHAPEFILE_ALL_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj", "sbn", "sbx", "fbn", "fbx", "ain", "aih", "ixs", "mxs", "atx", "cpg", SHP_XML_EXTENSION); + public final static List SHAPEFILE_ALL_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj", "sbn", "sbx", "fbn", "fbx", "ain", "aih", "ixs", "mxs", "atx", "cpg", "qpj", "qmd", SHP_XML_EXTENSION); public boolean DEBUG = false; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java b/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java index 2c9b7167059..b6f8870aa2d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java @@ -19,6 +19,8 @@ Two configurable options allow changing the limit for the number of authors or d import jakarta.json.Json; import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObjectBuilder; +import org.apache.commons.validator.routines.UrlValidator; + import java.util.ArrayList; import java.util.LinkedList; import java.util.List; @@ -71,14 +73,14 @@ public String getLinks() { String describedby = "<" + ds.getGlobalId().asURL().toString() + ">;rel=\"describedby\"" + ";type=\"" + "application/vnd.citationstyles.csl+json\""; describedby += ",<" + systemConfig.getDataverseSiteUrl() + "/api/datasets/export?exporter=schema.org&persistentId=" - + ds.getProtocol() + ":" + ds.getAuthority() + "/" + ds.getIdentifier() + ">;rel=\"describedby\"" + ";type=\"application/json+ld\""; + + ds.getProtocol() + ":" + ds.getAuthority() + "/" + ds.getIdentifier() + ">;rel=\"describedby\"" + ";type=\"application/ld+json\""; valueList.add(describedby); String type = ";rel=\"type\""; type = ";rel=\"type\",<" + defaultFileTypeValue + ">;rel=\"type\""; valueList.add(type); - String licenseString = DatasetUtil.getLicenseURI(workingDatasetVersion) + ";rel=\"license\""; + String licenseString = "<" + DatasetUtil.getLicenseURI(workingDatasetVersion) + ">;rel=\"license\""; valueList.add(licenseString); String linkset = "<" + systemConfig.getDataverseSiteUrl() + "/api/datasets/:persistentId/versions/" @@ -116,7 +118,7 @@ public JsonArrayBuilder getJsonLinkset() { systemConfig.getDataverseSiteUrl() + "/api/datasets/export?exporter=schema.org&persistentId=" + ds.getProtocol() + ":" + ds.getAuthority() + "/" + ds.getIdentifier() ).add( "type", - "application/json+ld" + "application/ld+json" ) ); JsonArrayBuilder linksetJsonObj = Json.createArrayBuilder(); @@ -164,12 +166,11 @@ private List getAuthorURLs(boolean limit) { for (DatasetAuthor da : workingDatasetVersion.getDatasetAuthors()) { logger.fine(String.format("idtype: %s; idvalue: %s, affiliation: %s; identifierUrl: %s", da.getIdType(), da.getIdValue(), da.getAffiliation(), da.getIdentifierAsUrl())); - String authorURL = ""; - authorURL = getAuthorUrl(da); + String authorURL = getAuthorUrl(da); if (authorURL != null && !authorURL.isBlank()) { // return empty if number of visible author more than max allowed // >= since we're comparing before incrementing visibleAuthorCounter - if (visibleAuthorCounter >= maxAuthors) { + if (limit && visibleAuthorCounter >= maxAuthors) { authorURLs.clear(); break; } @@ -211,15 +212,22 @@ private String getAuthorsAsString(List datasetAuthorURLs) { * */ private String getAuthorUrl(DatasetAuthor da) { - String authorURL = ""; - //If no type and there's a value, assume it is a URL (is this reasonable?) - //Otherise, get the URL using the type and value - if (da.getIdType() != null && !da.getIdType().isBlank() && da.getIdValue()!=null) { - authorURL = da.getIdValue(); - } else { - authorURL = da.getIdentifierAsUrl(); + + final String identifierAsUrl = da.getIdentifierAsUrl(); + // First, try to get URL using the type and value + if(identifierAsUrl != null) { + return identifierAsUrl; } - return authorURL; + + final String idValue = da.getIdValue(); + UrlValidator urlValidator = new UrlValidator(new String[]{"http", "https"}); + // Otherwise, try to use idValue as url if it's valid + if(urlValidator.isValid(idValue)) { + return idValue; + } + + // No url found + return null; } private JsonArrayBuilder getJsonAuthors(List datasetAuthorURLs) { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/StringUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/StringUtil.java index 33c87563104..137ae21d793 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/StringUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/StringUtil.java @@ -2,9 +2,13 @@ import edu.harvard.iq.dataverse.authorization.providers.oauth2.OAuth2LoginBackingBean; import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.security.InvalidAlgorithmParameterException; import java.security.InvalidKeyException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.security.SecureRandom; +import java.security.spec.AlgorithmParameterSpec; import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; @@ -19,6 +23,7 @@ import javax.crypto.Cipher; import javax.crypto.IllegalBlockSizeException; import javax.crypto.NoSuchPaddingException; +import javax.crypto.spec.GCMParameterSpec; import javax.crypto.spec.SecretKeySpec; import org.apache.commons.lang3.StringUtils; @@ -117,6 +122,10 @@ public static List htmlArray2textArray(List htmlArray) { return cleanTextArray; } + private final static SecureRandom secureRandom = new SecureRandom(); + // 12 bytes is recommended by GCM spec + private final static int GCM_IV_LENGTH = 12; + /** * Generates an AES-encrypted version of the string. Resultant string is URL safe. * @param value The value to encrypt. @@ -124,19 +133,26 @@ public static List htmlArray2textArray(List htmlArray) { * @return encrypted string, URL-safe. */ public static String encrypt(String value, String password ) { + byte[] baseBytes = value.getBytes(); try { - Cipher aes = Cipher.getInstance("AES"); + byte[] iv = new byte[GCM_IV_LENGTH]; //NEVER REUSE THIS IV WITH SAME KEY + secureRandom.nextBytes(iv); + Cipher aes = Cipher.getInstance("AES/GCM/NoPadding"); final SecretKeySpec secretKeySpec = generateKeyFromString(password); - aes.init(Cipher.ENCRYPT_MODE, secretKeySpec); + GCMParameterSpec parameterSpec = new GCMParameterSpec(128, iv); + aes.init(Cipher.ENCRYPT_MODE, secretKeySpec, parameterSpec); byte[] encrypted = aes.doFinal(baseBytes); - String base64ed = new String(Base64.getEncoder().encode(encrypted)); + ByteBuffer byteBuffer = ByteBuffer.allocate(iv.length + encrypted.length); + byteBuffer.put(iv); + byteBuffer.put(encrypted); + String base64ed = new String(Base64.getEncoder().encode(byteBuffer.array())); return base64ed.replaceAll("\\+", ".") .replaceAll("=", "-") .replaceAll("/", "_"); } catch ( InvalidKeyException | NoSuchAlgorithmException | BadPaddingException - | IllegalBlockSizeException | NoSuchPaddingException | UnsupportedEncodingException ex) { + | IllegalBlockSizeException | NoSuchPaddingException | UnsupportedEncodingException | InvalidAlgorithmParameterException ex) { Logger.getLogger(OAuth2LoginBackingBean.class.getName()).log(Level.SEVERE, null, ex); throw new RuntimeException(ex); } @@ -149,13 +165,15 @@ public static String decrypt(String value, String password ) { byte[] baseBytes = Base64.getDecoder().decode(base64); try { - Cipher aes = Cipher.getInstance("AES"); - aes.init( Cipher.DECRYPT_MODE, generateKeyFromString(password)); - byte[] decrypted = aes.doFinal(baseBytes); + Cipher aes = Cipher.getInstance("AES/GCM/NoPadding"); + //use first 12 bytes for iv + AlgorithmParameterSpec gcmIv = new GCMParameterSpec(128, baseBytes, 0, GCM_IV_LENGTH); + aes.init( Cipher.DECRYPT_MODE, generateKeyFromString(password),gcmIv); + byte[] decrypted = aes.doFinal(baseBytes,GCM_IV_LENGTH, baseBytes.length - GCM_IV_LENGTH); return new String(decrypted); } catch ( InvalidKeyException | NoSuchAlgorithmException | BadPaddingException - | IllegalBlockSizeException | NoSuchPaddingException | UnsupportedEncodingException ex) { + | IllegalBlockSizeException | NoSuchPaddingException | UnsupportedEncodingException | InvalidAlgorithmParameterException ex) { Logger.getLogger(OAuth2LoginBackingBean.class.getName()).log(Level.SEVERE, null, ex); throw new RuntimeException(ex); } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 4fed3a05976..3f1ec3dd7eb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -893,7 +893,7 @@ public String toString() { } } - + public boolean isPublicInstall(){ boolean saneDefault = false; return settingsService.isTrueForKey(SettingsServiceBean.Key.PublicInstall, saneDefault); @@ -941,18 +941,13 @@ public boolean isHTTPDownload() { } public boolean isGlobusDownload() { - return getMethodAvailable(FileUploadMethods.GLOBUS.toString(), false); + return getMethodAvailable(FileDownloadMethods.GLOBUS.toString(), false); } public boolean isGlobusFileDownload() { return (isGlobusDownload() && settingsService.isTrueForKey(SettingsServiceBean.Key.GlobusSingleFileTransfer, false)); } - public List getGlobusStoresList() { - String globusStores = settingsService.getValueForKey(SettingsServiceBean.Key.GlobusStores, ""); - return Arrays.asList(globusStores.split("\\s*,\\s*")); - } - private Boolean getMethodAvailable(String method, boolean upload) { String methods = settingsService.getValueForKey( upload ? SettingsServiceBean.Key.UploadMethods : SettingsServiceBean.Key.DownloadMethods); @@ -971,25 +966,6 @@ public Integer getUploadMethodCount(){ return Arrays.asList(uploadMethods.toLowerCase().split("\\s*,\\s*")).size(); } } - public boolean isDataFilePIDSequentialDependent(){ - String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); - String doiDataFileFormat = settingsService.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); - if (doiIdentifierType.equals("storedProcGenerated") && doiDataFileFormat.equals("DEPENDENT")){ - return true; - } - return false; - } - - public int getPIDAsynchRegFileCount() { - String fileCount = settingsService.getValueForKey(SettingsServiceBean.Key.PIDAsynchRegFileCount, "10"); - int retVal = 10; - try { - retVal = Integer.parseInt(fileCount); - } catch (NumberFormatException e) { - //if no number in the setting we'll return 10 - } - return retVal; - } public boolean isAllowCustomTerms() { boolean safeDefaultIfKeyNotFound = true; @@ -1021,16 +997,7 @@ public boolean isFilePIDsEnabledForCollection(Dataverse collection) { return thisCollection.getFilePIDsEnabled(); } - public boolean isIndependentHandleService() { - boolean safeDefaultIfKeyNotFound = false; - return settingsService.isTrueForKey(SettingsServiceBean.Key.IndependentHandleService, safeDefaultIfKeyNotFound); - - } - - public String getHandleAuthHandle() { - String handleAuthHandle = settingsService.getValueForKey(SettingsServiceBean.Key.HandleAuthHandle, null); - return handleAuthHandle; - } + public String getMDCLogPath() { String mDCLogPath = settingsService.getValueForKey(SettingsServiceBean.Key.MDCLogPath, null); @@ -1164,4 +1131,26 @@ public boolean isSignupDisabledForRemoteAuthProvider(String providerId) { return !ret; } + + public boolean isStorageQuotasEnforced() { + return settingsService.isTrueForKey(SettingsServiceBean.Key.UseStorageQuotas, false); + } + + /** + * This method should only be used for testing of the new storage quota + * mechanism, temporarily. (it uses the same value as the quota for + * *everybody* regardless of the circumstances, defined as a database + * setting) + */ + public Long getTestStorageQuotaLimit() { + return settingsService.getValueForKeyAsLong(SettingsServiceBean.Key.StorageQuotaSizeInBytes); + } + /** + * Should we store tab-delimited files produced during ingest *with* the + * variable name header line included? + * @return boolean - defaults to false. + */ + public boolean isStoringIngestedFilesWithHeaders() { + return settingsService.isTrueForKey(SettingsServiceBean.Key.StoreIngestedTabularFilesWithVarHeaders, false); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java index 4ae76a7b8db..a3293e0cd28 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java @@ -6,6 +6,10 @@ import java.util.regex.Pattern; import jakarta.json.Json; +import jakarta.json.JsonArray; +import jakarta.json.JsonArrayBuilder; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonValue; import edu.harvard.iq.dataverse.DataFile; @@ -13,6 +17,10 @@ import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + +import static edu.harvard.iq.dataverse.api.ApiConstants.DS_VERSION_DRAFT; public class URLTokenUtil { @@ -22,6 +30,13 @@ public class URLTokenUtil { protected final FileMetadata fileMetadata; protected ApiToken apiToken; protected String localeCode; + + + public static final String HTTP_METHOD="httpMethod"; + public static final String TIMEOUT="timeOut"; + public static final String SIGNED_URL="signedUrl"; + public static final String NAME="name"; + public static final String URL_TEMPLATE="urlTemplate"; /** * File level @@ -177,8 +192,7 @@ private String getTokenValue(String value) { } } if (("DRAFT").equals(versionString)) { - versionString = ":draft"; // send the token needed in api calls that can be substituted for a numeric - // version. + versionString = DS_VERSION_DRAFT; // send the token needed in api calls that can be substituted for a numeric version. } return versionString; case FILE_METADATA_ID: @@ -193,6 +207,58 @@ private String getTokenValue(String value) { throw new IllegalArgumentException("Cannot replace reserved word: " + value); } + public JsonObjectBuilder createPostBody(JsonObject params, JsonArray allowedApiCalls) { + JsonObjectBuilder bodyBuilder = Json.createObjectBuilder(); + bodyBuilder.add("queryParameters", params); + if (allowedApiCalls != null && !allowedApiCalls.isEmpty()) { + JsonArrayBuilder apisBuilder = Json.createArrayBuilder(); + allowedApiCalls.getValuesAs(JsonObject.class).forEach(((apiObj) -> { + logger.fine(JsonUtil.prettyPrint(apiObj)); + String name = apiObj.getJsonString(NAME).getString(); + String httpmethod = apiObj.getJsonString(HTTP_METHOD).getString(); + int timeout = apiObj.getInt(TIMEOUT); + String urlTemplate = apiObj.getJsonString(URL_TEMPLATE).getString(); + logger.fine("URL Template: " + urlTemplate); + urlTemplate = SystemConfig.getDataverseSiteUrlStatic() + urlTemplate; + String apiPath = replaceTokensWithValues(urlTemplate); + logger.fine("URL WithTokens: " + apiPath); + String url = apiPath; + // Sign if apiToken exists, otherwise send unsigned URL (i.e. for guest users) + ApiToken apiToken = getApiToken(); + if (apiToken != null) { + url = UrlSignerUtil.signUrl(apiPath, timeout, apiToken.getAuthenticatedUser().getUserIdentifier(), + httpmethod, JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + + getApiToken().getTokenString()); + } + logger.fine("Signed URL: " + url); + apisBuilder.add(Json.createObjectBuilder().add(NAME, name).add(HTTP_METHOD, httpmethod) + .add(SIGNED_URL, url).add(TIMEOUT, timeout)); + })); + bodyBuilder.add("signedUrls", apisBuilder); + } + return bodyBuilder; + } + + public JsonObject getParams(JsonObject toolParameters) { + //ToDo - why an array of object each with a single key/value pair instead of one object? + JsonArray queryParams = toolParameters.getJsonArray("queryParameters"); + + // ToDo return json and print later + JsonObjectBuilder paramsBuilder = Json.createObjectBuilder(); + if (!(queryParams == null) && !queryParams.isEmpty()) { + queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> { + queryParam.keySet().forEach((key) -> { + String value = queryParam.getString(key); + JsonValue param = getParam(value); + if (param != null) { + paramsBuilder.add(key, param); + } + }); + }); + } + return paramsBuilder.build(); + } + public static String getScriptForUrl(String url) { String msg = BundleUtil.getStringFromBundle("externaltools.enable.browser.popups"); String script = "const newWin = window.open('" + url + "', target='_blank'); if (!newWin || newWin.closed || typeof newWin.closed == \"undefined\") {alert(\"" + msg + "\");}"; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index baba1a0cb43..b7c44014b80 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -74,7 +74,9 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFile.ChecksumType; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.json.JsonLDTerm; +import java.util.Optional; public class BagGenerator { @@ -822,17 +824,20 @@ private String generateInfoFile() { logger.warning("No contact info available for BagIt Info file"); } - info.append("Source-Organization: " + BundleUtil.getStringFromBundle("bagit.sourceOrganization")); + String orgName = JvmSettings.BAGIT_SOURCE_ORG_NAME.lookupOptional(String.class).orElse("Dataverse Installation ()"); + String orgAddress = JvmSettings.BAGIT_SOURCEORG_ADDRESS.lookupOptional(String.class).orElse(""); + String orgEmail = JvmSettings.BAGIT_SOURCEORG_EMAIL.lookupOptional(String.class).orElse(""); + + info.append("Source-Organization: " + orgName); // ToDo - make configurable info.append(CRLF); - info.append("Organization-Address: " + WordUtils.wrap( - BundleUtil.getStringFromBundle("bagit.sourceOrganizationAddress"), 78, CRLF + " ", true)); + info.append("Organization-Address: " + WordUtils.wrap(orgAddress, 78, CRLF + " ", true)); + info.append(CRLF); // Not a BagIt standard name - info.append( - "Organization-Email: " + BundleUtil.getStringFromBundle("bagit.sourceOrganizationEmail")); + info.append("Organization-Email: " + orgEmail); info.append(CRLF); info.append("External-Description: "); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java index b3995b5957e..aa653a6e360 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DatasetFieldType; import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.DatasetVersion.VersionState; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObjectContainer; import edu.harvard.iq.dataverse.Embargo; @@ -39,13 +40,31 @@ import org.apache.commons.lang3.exception.ExceptionUtils; +/** + * This class is used to generate a JSON-LD representation of a Dataverse object leveraging the OAI_ORE and other community vocabularies. As of v1.0.0, + * the format is being versioned and ANY CHANGES TO THE OUTPUT of this class must be reflected in a version increment (see DATAVERSE_ORE_FORMAT_VERSION). + * + * The OREMap class is intended to record ALL the information needed to recreate an existing Dataverse dataset. As of v1.0.0, this is true with the + * exception that auxiliary files are not referenced in the OREMap. While many types of auxiliary files will be regenerated automatically based on datafile + * contents, Dataverse now allows manually uploaded auxiliary files and these cannot be reproduced solely from the dataset/datafile contents. + */ public class OREMap { + //Required Services static SettingsServiceBean settingsService; static DatasetFieldServiceBean datasetFieldService; + static SystemConfig systemConfig; + private static final Logger logger = Logger.getLogger(OREMap.class.getCanonicalName()); public static final String NAME = "OREMap"; + + //NOTE: Update this value whenever the output of this class is changed + private static final String DATAVERSE_ORE_FORMAT_VERSION = "Dataverse OREMap Format v1.0.0"; + private static final String DATAVERSE_SOFTWARE_NAME = "Dataverse"; + private static final String DATAVERSE_SOFTWARE_URL = "https://github.com/iqss/dataverse"; + + private Map localContext = new TreeMap(); private DatasetVersion version; private Boolean excludeEmail = null; @@ -114,6 +133,18 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) { .add(JsonLDTerm.schemaOrg("name").getLabel(), version.getTitle()) .add(JsonLDTerm.schemaOrg("dateModified").getLabel(), version.getLastUpdateTime().toString()); addIfNotNull(aggBuilder, JsonLDTerm.schemaOrg("datePublished"), dataset.getPublicationDateFormattedYYYYMMDD()); + //Add version state info - DRAFT, RELEASED, DEACCESSIONED, ARCHIVED with extra info for DEACCESIONED + VersionState vs = version.getVersionState(); + if(vs.equals(VersionState.DEACCESSIONED)) { + JsonObjectBuilder deaccBuilder = Json.createObjectBuilder(); + deaccBuilder.add(JsonLDTerm.schemaOrg("name").getLabel(), vs.name()); + deaccBuilder.add(JsonLDTerm.DVCore("reason").getLabel(), version.getVersionNote()); + addIfNotNull(deaccBuilder, JsonLDTerm.DVCore("forwardUrl"), version.getArchiveNote()); + aggBuilder.add(JsonLDTerm.schemaOrg("creativeWorkStatus").getLabel(), deaccBuilder); + + } else { + aggBuilder.add(JsonLDTerm.schemaOrg("creativeWorkStatus").getLabel(), vs.name()); + } TermsOfUseAndAccess terms = version.getTermsOfUseAndAccess(); if (terms.getLicense() != null) { @@ -269,10 +300,23 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) { return aggBuilder.add("@context", contextBuilder.build()); } else { // Now create the overall map object with it's metadata + + //Start with a reference to the Dataverse software + JsonObjectBuilder dvSoftwareBuilder = Json.createObjectBuilder() + .add("@type", JsonLDTerm.schemaOrg("SoftwareApplication").getLabel()) + .add(JsonLDTerm.schemaOrg("name").getLabel(), DATAVERSE_SOFTWARE_NAME) + .add(JsonLDTerm.schemaOrg("version").getLabel(), systemConfig.getVersion(true)) + .add(JsonLDTerm.schemaOrg("url").getLabel(), DATAVERSE_SOFTWARE_URL); + + //Now the OREMAP object itself JsonObjectBuilder oremapBuilder = Json.createObjectBuilder() .add(JsonLDTerm.dcTerms("modified").getLabel(), LocalDate.now().toString()) .add(JsonLDTerm.dcTerms("creator").getLabel(), BrandingUtil.getInstallationBrandName()) .add("@type", JsonLDTerm.ore("ResourceMap").getLabel()) + //Add the version of our ORE format used + .add(JsonLDTerm.schemaOrg("additionalType").getLabel(), DATAVERSE_ORE_FORMAT_VERSION) + //Indicate which Dataverse version created it + .add(JsonLDTerm.DVCore("generatedBy").getLabel(), dvSoftwareBuilder) // Define an id for the map itself (separate from the @id of the dataset being // described .add("@id", @@ -467,8 +511,10 @@ private static void addCvocValue(String val, JsonArrayBuilder vals, JsonObject c } } - public static void injectSettingsService(SettingsServiceBean settingsSvc, DatasetFieldServiceBean datasetFieldSvc) { + //These are used to pick up various settings/constants from the application + public static void injectServices(SettingsServiceBean settingsSvc, DatasetFieldServiceBean datasetFieldSvc, SystemConfig systemCfg) { settingsService = settingsSvc; datasetFieldService = datasetFieldSvc; + systemConfig = systemCfg; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMapHelper.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMapHelper.java index 4d63edac268..cca1e16b4f8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMapHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMapHelper.java @@ -2,7 +2,7 @@ import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; - +import edu.harvard.iq.dataverse.util.SystemConfig; import jakarta.annotation.PostConstruct; import jakarta.ejb.EJB; import jakarta.ejb.Singleton; @@ -22,8 +22,11 @@ public class OREMapHelper { @EJB DatasetFieldServiceBean datasetFieldSvc; + @EJB + SystemConfig systemConfig; + @PostConstruct public void injectService() { - OREMap.injectSettingsService(settingsSvc, datasetFieldSvc); + OREMap.injectServices(settingsSvc, datasetFieldSvc, systemConfig); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FileUtilWrapper.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FileUtilWrapper.java index 2bcac04076a..ecb34bdcfb5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FileUtilWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/data/FileUtilWrapper.java @@ -3,6 +3,7 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; +import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException; import edu.harvard.iq.dataverse.util.FileUtil; import java.io.File; @@ -43,7 +44,11 @@ public void deleteFile(Path filePath) { } public File saveInputStreamInTempFile(InputStream inputStream, Long fileSizeLimit) throws IOException, FileExceedsMaxSizeException { - return FileUtil.saveInputStreamInTempFile(inputStream, fileSizeLimit); + try { + return FileUtil.saveInputStreamInTempFile(inputStream, fileSizeLimit); + } catch (FileExceedsStorageQuotaException fesqx) { + return null; + } } public String determineFileType(File file, String fileName) throws IOException { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/file/FileExceedsStorageQuotaException.java b/src/main/java/edu/harvard/iq/dataverse/util/file/FileExceedsStorageQuotaException.java new file mode 100644 index 00000000000..29eeca254f7 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/file/FileExceedsStorageQuotaException.java @@ -0,0 +1,22 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package edu.harvard.iq.dataverse.util.file; + +/** + * + * @author landreev + */ +public class FileExceedsStorageQuotaException extends Exception { + + public FileExceedsStorageQuotaException(String message) { + super(message); + } + + public FileExceedsStorageQuotaException(String message, Throwable cause) { + super(message, cause); + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java index 113a6128364..637f002f5ad 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java @@ -39,7 +39,6 @@ import edu.harvard.iq.dataverse.DatasetFieldValue; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.MetadataBlock; import edu.harvard.iq.dataverse.MetadataBlockServiceBean; import edu.harvard.iq.dataverse.TermsOfUseAndAccess; @@ -52,6 +51,8 @@ import edu.harvard.iq.dataverse.DatasetVersion.VersionState; import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.license.LicenseServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import jakarta.json.JsonReader; public class JSONLDUtil { @@ -82,7 +83,7 @@ public static Dataset updateDatasetMDFromJsonLD(Dataset ds, String jsonLDBody, JsonObject jsonld = decontextualizeJsonLD(jsonLDBody); if (migrating) { - Optional maybePid = GlobalIdServiceBean.parse(jsonld.getString("@id")); + Optional maybePid = PidProvider.parse(jsonld.getString("@id")); if (maybePid.isPresent()) { ds.setGlobalId(maybePid.get()); } else { @@ -533,13 +534,11 @@ public static JsonObject decontextualizeJsonLD(String jsonLDString) { try (StringReader rdr = new StringReader(jsonLDString)) { // Use JsonLd to expand/compact to localContext - JsonObject jsonld = Json.createReader(rdr).readObject(); - JsonDocument doc = JsonDocument.of(jsonld); - JsonArray array = null; - try { - array = JsonLd.expand(doc).get(); - jsonld = JsonLd.compact(JsonDocument.of(array), JsonDocument.of(Json.createObjectBuilder().build())) - .get(); + try (JsonReader jsonReader = Json.createReader(rdr)) { + JsonObject jsonld = jsonReader.readObject(); + JsonDocument doc = JsonDocument.of(jsonld); + JsonArray array = JsonLd.expand(doc).get(); + jsonld = JsonLd.compact(JsonDocument.of(array), JsonDocument.of(Json.createObjectBuilder().build())).get(); // jsonld = array.getJsonObject(0); logger.fine("Decontextualized object: " + jsonld); return jsonld; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index febb785cd95..2141fcc5fca 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -34,7 +34,6 @@ import edu.harvard.iq.dataverse.workflow.step.WorkflowStepData; import org.apache.commons.validator.routines.DomainValidator; -import java.io.StringReader; import java.sql.Timestamp; import java.text.ParseException; import java.util.ArrayList; @@ -53,7 +52,6 @@ import jakarta.json.Json; import jakarta.json.JsonArray; import jakarta.json.JsonObject; -import jakarta.json.JsonReader; import jakarta.json.JsonString; import jakarta.json.JsonValue; import jakarta.json.JsonValue.ValueType; @@ -320,8 +318,8 @@ public DatasetVersion parseDatasetVersion(JsonObject obj) throws JsonParseExcept public Dataset parseDataset(JsonObject obj) throws JsonParseException { Dataset dataset = new Dataset(); - dataset.setAuthority(obj.getString("authority", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Authority) : obj.getString("authority")); - dataset.setProtocol(obj.getString("protocol", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Protocol) : obj.getString("protocol")); + dataset.setAuthority(obj.getString("authority", null)); + dataset.setProtocol(obj.getString("protocol", null)); dataset.setIdentifier(obj.getString("identifier",null)); String mdl = obj.getString("metadataLanguage",null); if(mdl==null || settingsService.getBaseMetadataLanguageMap(new HashMap(), true).containsKey(mdl)) { @@ -682,8 +680,7 @@ private DatasetField remapGeographicCoverage(CompoundVocabularyException ex) thr // convert DTO to datasetField so we can back valid values. Gson gson = new Gson(); String jsonString = gson.toJson(geoCoverageDTO); - JsonReader jsonReader = Json.createReader(new StringReader(jsonString)); - JsonObject obj = jsonReader.readObject(); + JsonObject obj = JsonUtil.getJsonObject(jsonString); DatasetField geoCoverageField = parseField(obj); // add back valid values diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index b6026998bb7..d9f19f1e94e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -1,22 +1,6 @@ package edu.harvard.iq.dataverse.util.json; import edu.harvard.iq.dataverse.*; -import edu.harvard.iq.dataverse.AuxiliaryFile; -import edu.harvard.iq.dataverse.ControlledVocabularyValue; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.DataFileTag; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetDistributor; -import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.DatasetField; -import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; -import edu.harvard.iq.dataverse.DatasetFieldValue; -import edu.harvard.iq.dataverse.DatasetLock; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DataverseContact; -import edu.harvard.iq.dataverse.DataverseFacet; -import edu.harvard.iq.dataverse.DataverseTheme; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.groups.impl.maildomain.MailDomainGroup; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUser; @@ -57,7 +41,6 @@ import jakarta.json.Json; import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObjectBuilder; -import jakarta.json.JsonValue; import java.util.function.BiConsumer; import java.util.function.BinaryOperator; @@ -72,6 +55,7 @@ import jakarta.ejb.Singleton; import jakarta.json.JsonArray; import jakarta.json.JsonObject; +import java.math.BigDecimal; /** * Convert objects to Json. @@ -274,11 +258,11 @@ public static JsonObjectBuilder json(Workflow wf){ } public static JsonObjectBuilder json(Dataverse dv) { - return json(dv, false); + return json(dv, false, false); } //TODO: Once we upgrade to Java EE 8 we can remove objects from the builder, and this email removal can be done in a better place. - public static JsonObjectBuilder json(Dataverse dv, Boolean hideEmail) { + public static JsonObjectBuilder json(Dataverse dv, Boolean hideEmail, Boolean returnOwners) { JsonObjectBuilder bld = jsonObjectBuilder() .add("id", dv.getId()) .add("alias", dv.getAlias()) @@ -287,7 +271,9 @@ public static JsonObjectBuilder json(Dataverse dv, Boolean hideEmail) { if(!hideEmail) { bld.add("dataverseContacts", JsonPrinter.json(dv.getDataverseContacts())); } - + if (returnOwners){ + bld.add("isPartOf", getOwnersFromDvObject(dv)); + } bld.add("permissionRoot", dv.isPermissionRoot()) .add("description", dv.getDescription()) .add("dataverseType", dv.getDataverseType().name()); @@ -320,7 +306,57 @@ public static JsonArrayBuilder json(List dataverseContacts) { } return jsonArrayOfContacts; } + + public static JsonObjectBuilder getOwnersFromDvObject(DvObject dvObject){ + return getOwnersFromDvObject(dvObject, null); + } + + public static JsonObjectBuilder getOwnersFromDvObject(DvObject dvObject, DatasetVersion dsv) { + List ownerList = new ArrayList(); + dvObject = dvObject.getOwner(); // We're going to ignore the object itself + //Get "root" to top of list + while (dvObject != null) { + ownerList.add(0, dvObject); + dvObject = dvObject.getOwner(); + } + //then work "inside out" + JsonObjectBuilder saved = null; + for (DvObject dvo : ownerList) { + saved = addEmbeddedOwnerObject(dvo, saved, dsv); + } + return saved; + } + + private static JsonObjectBuilder addEmbeddedOwnerObject(DvObject dvo, JsonObjectBuilder isPartOf, DatasetVersion dsv ) { + JsonObjectBuilder ownerObject = jsonObjectBuilder(); + + if (dvo.isInstanceofDataverse()) { + ownerObject.add("type", "DATAVERSE"); + Dataverse in = (Dataverse) dvo; + ownerObject.add("identifier", in.getAlias()); + } + if (dvo.isInstanceofDataset()) { + ownerObject.add("type", "DATASET"); + if (dvo.getGlobalId() != null) { + ownerObject.add("persistentIdentifier", dvo.getGlobalId().asString()); + } + ownerObject.add("identifier", dvo.getId()); + String versionString = dsv == null ? "" : dsv.getFriendlyVersionNumber(); + if (!versionString.isEmpty()){ + ownerObject.add("version", versionString); + } + } + + ownerObject.add("displayName", dvo.getDisplayName()); + + if (isPartOf != null) { + ownerObject.add("isPartOf", isPartOf); + } + + return ownerObject; + } + public static JsonObjectBuilder json( DataverseTheme theme ) { final NullSafeJsonBuilder baseObject = jsonObjectBuilder() .add("id", theme.getId() ) @@ -343,8 +379,12 @@ public static JsonObjectBuilder json(BuiltinUser user) { .add("id", user.getId()) .add("userName", user.getUserName()); } + + public static JsonObjectBuilder json(Dataset ds){ + return json(ds, false); + } - public static JsonObjectBuilder json(Dataset ds) { + public static JsonObjectBuilder json(Dataset ds, Boolean returnOwners) { JsonObjectBuilder bld = jsonObjectBuilder() .add("id", ds.getId()) .add("identifier", ds.getIdentifier()) @@ -357,6 +397,9 @@ public static JsonObjectBuilder json(Dataset ds) { if (DvObjectContainer.isMetadataLanguageSet(ds.getMetadataLanguage())) { bld.add("metadataLanguage", ds.getMetadataLanguage()); } + if (returnOwners){ + bld.add("isPartOf", getOwnersFromDvObject(ds)); + } return bld; } @@ -368,11 +411,11 @@ public static JsonObjectBuilder json(FileDetailsHolder ds) { .add("mime",ds.getMime())); } - public static JsonObjectBuilder json(DatasetVersion dsv) { - return json(dsv, null); + public static JsonObjectBuilder json(DatasetVersion dsv, boolean includeFiles) { + return json(dsv, null, includeFiles, false); } - public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList) { + public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList, boolean includeFiles, boolean returnOwners) { Dataset dataset = dsv.getDataset(); JsonObjectBuilder bld = jsonObjectBuilder() .add("id", dsv.getId()).add("datasetId", dataset.getId()) @@ -388,6 +431,7 @@ public static JsonObjectBuilder json(DatasetVersion dsv, List anonymized .add("alternativePersistentId", dataset.getAlternativePersistentIdentifier()) .add("publicationDate", dataset.getPublicationDateFormattedYYYYMMDD()) .add("citationDate", dataset.getCitationDateFormattedYYYYMMDD()); + License license = DatasetUtil.getLicense(dsv); if (license != null) { bld.add("license", jsonLicense(dsv)); @@ -414,8 +458,13 @@ public static JsonObjectBuilder json(DatasetVersion dsv, List anonymized bld.add("metadataBlocks", (anonymizedFieldTypeNamesList != null) ? jsonByBlocks(dsv.getDatasetFields(), anonymizedFieldTypeNamesList) : jsonByBlocks(dsv.getDatasetFields()) - ); - bld.add("files", jsonFileMetadatas(dsv.getFileMetadatas())); + ); + if(returnOwners){ + bld.add("isPartOf", getOwnersFromDvObject(dataset)); + } + if (includeFiles) { + bld.add("files", jsonFileMetadatas(dsv.getFileMetadatas())); + } return bld; } @@ -447,8 +496,8 @@ public static JsonObjectBuilder jsonDataFileList(List dataFiles){ * to the regular `json` method for DatasetVersion? Will anything break? * Unit tests for that method could not be found. */ - public static JsonObjectBuilder jsonWithCitation(DatasetVersion dsv) { - JsonObjectBuilder dsvWithCitation = JsonPrinter.json(dsv); + public static JsonObjectBuilder jsonWithCitation(DatasetVersion dsv, boolean includeFiles) { + JsonObjectBuilder dsvWithCitation = JsonPrinter.json(dsv, includeFiles); dsvWithCitation.add("citation", dsv.getCitation()); return dsvWithCitation; } @@ -467,7 +516,7 @@ public static JsonObjectBuilder jsonWithCitation(DatasetVersion dsv) { */ public static JsonObjectBuilder jsonAsDatasetDto(DatasetVersion dsv) { JsonObjectBuilder datasetDtoAsJson = JsonPrinter.json(dsv.getDataset()); - datasetDtoAsJson.add("datasetVersion", jsonWithCitation(dsv)); + datasetDtoAsJson.add("datasetVersion", jsonWithCitation(dsv, true)); return datasetDtoAsJson; } @@ -575,11 +624,14 @@ public static JsonObjectBuilder json(DatasetFieldType fld) { fieldsBld.add("displayName", fld.getDisplayName()); fieldsBld.add("title", fld.getTitle()); fieldsBld.add("type", fld.getFieldType().toString()); + fieldsBld.add("typeClass", typeClassString(fld)); fieldsBld.add("watermark", fld.getWatermark()); fieldsBld.add("description", fld.getDescription()); fieldsBld.add("multiple", fld.isAllowMultiples()); fieldsBld.add("isControlledVocabulary", fld.isControlledVocabulary()); fieldsBld.add("displayFormat", fld.getDisplayFormat()); + fieldsBld.add("isRequired", fld.isRequired()); + fieldsBld.add("displayOrder", fld.getDisplayOrder()); if (fld.isControlledVocabulary()) { // If the field has a controlled vocabulary, // add all values to the resulting JSON @@ -599,30 +651,41 @@ public static JsonObjectBuilder json(DatasetFieldType fld) { return fieldsBld; } + + public static JsonObjectBuilder json(FileMetadata fmd){ + return json(fmd, false, false); + } + + public static JsonObjectBuilder json(FileMetadata fmd, boolean returnOwners, boolean printDatasetVersion) { + JsonObjectBuilder builder = jsonObjectBuilder(); - public static JsonObjectBuilder json(FileMetadata fmd) { - return jsonObjectBuilder() // deprecated: .add("category", fmd.getCategory()) - // TODO: uh, figure out what to do here... it's deprecated - // in a sense that there's no longer the category field in the - // fileMetadata object; but there are now multiple, oneToMany file + // TODO: uh, figure out what to do here... it's deprecated + // in a sense that there's no longer the category field in the + // fileMetadata object; but there are now multiple, oneToMany file // categories - and we probably need to export them too!) -- L.A. 4.5 - // DONE: catgegories by name - .add("description", fmd.getDescription()) + // DONE: catgegories by name + builder.add("description", fmd.getDescription()) .add("label", fmd.getLabel()) // "label" is the filename - .add("restricted", fmd.isRestricted()) + .add("restricted", fmd.isRestricted()) .add("directoryLabel", fmd.getDirectoryLabel()) .add("version", fmd.getVersion()) .add("datasetVersionId", fmd.getDatasetVersion().getId()) .add("categories", getFileCategories(fmd)) - .add("dataFile", JsonPrinter.json(fmd.getDataFile(), fmd, false)); + .add("dataFile", JsonPrinter.json(fmd.getDataFile(), fmd, false, returnOwners)); + + if (printDatasetVersion) { + builder.add("datasetVersion", json(fmd.getDatasetVersion(), false)); + } + + return builder; } - public static JsonObjectBuilder json(AuxiliaryFile auxFile) { + public static JsonObjectBuilder json(AuxiliaryFile auxFile) { return jsonObjectBuilder() .add("formatTag", auxFile.getFormatTag()) .add("formatVersion", auxFile.getFormatVersion()) // "label" is the filename - .add("origin", auxFile.getOrigin()) + .add("origin", auxFile.getOrigin()) .add("isPublic", auxFile.getIsPublic()) .add("type", auxFile.getType()) .add("contentType", auxFile.getContentType()) @@ -630,11 +693,16 @@ public static JsonObjectBuilder json(AuxiliaryFile auxFile) { .add("checksum", auxFile.getChecksum()) .add("dataFile", JsonPrinter.json(auxFile.getDataFile())); } + public static JsonObjectBuilder json(DataFile df) { return JsonPrinter.json(df, null, false); } - public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boolean forExportDataProvider) { + public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boolean forExportDataProvider){ + return json(df, fileMetadata, forExportDataProvider, false); + } + + public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boolean forExportDataProvider, boolean returnOwners) { // File names are no longer stored in the DataFile entity; // (they are instead in the FileMetadata (as "labels") - this way // the filename can change between versions... @@ -665,6 +733,7 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boo .add("pidURL", pidURL) .add("filename", fileName) .add("contentType", df.getContentType()) + .add("friendlyType", df.getFriendlyType()) .add("filesize", df.getFilesize()) .add("description", fileMetadata.getDescription()) .add("categories", getFileCategories(fileMetadata)) @@ -688,8 +757,14 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boo //--------------------------------------------- .add("md5", getMd5IfItExists(df.getChecksumType(), df.getChecksumValue())) .add("checksum", getChecksumTypeAndValue(df.getChecksumType(), df.getChecksumValue())) + .add("tabularData", df.isTabularData()) .add("tabularTags", getTabularFileTags(df)) - .add("creationDate", df.getCreateDateFormattedYYYYMMDD()); + .add("creationDate", df.getCreateDateFormattedYYYYMMDD()) + .add("publicationDate", df.getPublicationDateFormattedYYYYMMDD()); + Dataset dfOwner = df.getOwner(); + if (dfOwner != null) { + builder.add("fileAccessRequest", dfOwner.isFileAccessRequest()); + } /* * The restricted state was not included prior to #9175 so to avoid backward * incompatability, it is now only added when generating json for the @@ -703,6 +778,9 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boo ? JsonPrinter.jsonVarGroup(fileMetadata.getVarGroups()) : null); } + if (returnOwners){ + builder.add("isPartOf", getOwnersFromDvObject(df, fileMetadata.getDatasetVersion())); + } return builder; } @@ -756,7 +834,7 @@ public static JsonObjectBuilder json(DataVariable dv) { .add("variableMetadata",jsonVarMetadata(dv.getVariableMetadatas())) .add("invalidRanges", dv.getInvalidRanges().isEmpty() ? null : JsonPrinter.jsonInvalidRanges(dv.getInvalidRanges())) .add("summaryStatistics", dv.getSummaryStatistics().isEmpty() ? null : JsonPrinter.jsonSumStat(dv.getSummaryStatistics())) - .add("variableCategories", dv.getCategories().isEmpty() ? null : JsonPrinter.jsonCatStat(dv.getCategories())) + .add("variableCategories", dv.getCategories().isEmpty() ? null : JsonPrinter.jsonCatStat(dv.getCategories())) ; } @@ -799,9 +877,10 @@ private static JsonArrayBuilder jsonCatStat(Collection catStat JsonObjectBuilder catStatObj = Json.createObjectBuilder(); catStatObj.add("label", stat.getLabel()) .add("value", stat.getValue()) - .add("isMissing", stat.isMissing()) - .add("frequency", stat.getFrequency()) - ; + .add("isMissing", stat.isMissing()); + if(stat.getFrequency()!=null){ + catStatObj.add("frequency", stat.getFrequency()); + } catArr.add(catStatObj); } return catArr; @@ -1110,6 +1189,30 @@ public Set characteristics() { }; } + public static JsonObjectBuilder json(Map map) { + JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder(); + for (Map.Entry mapEntry : map.entrySet()) { + jsonObjectBuilder.add(mapEntry.getKey(), mapEntry.getValue()); + } + return jsonObjectBuilder; + } + + public static JsonObjectBuilder jsonFileCountPerAccessStatusMap(Map map) { + JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder(); + for (Map.Entry mapEntry : map.entrySet()) { + jsonObjectBuilder.add(mapEntry.getKey().toString(), mapEntry.getValue()); + } + return jsonObjectBuilder; + } + + public static JsonObjectBuilder jsonFileCountPerTabularTagNameMap(Map map) { + JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder(); + for (Map.Entry mapEntry : map.entrySet()) { + jsonObjectBuilder.add(mapEntry.getKey().toString(), mapEntry.getValue()); + } + return jsonObjectBuilder; + } + public static Collector, JsonArrayBuilder> toJsonArray() { return new Collector, JsonArrayBuilder>() { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java index 09d02854bab..72a1cd2e1eb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java @@ -1,9 +1,9 @@ package edu.harvard.iq.dataverse.util.json; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import com.google.gson.JsonObject; - +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStream; import java.io.StringReader; import java.io.StringWriter; import java.util.HashMap; @@ -11,6 +11,9 @@ import java.util.logging.Logger; import jakarta.json.Json; import jakarta.json.JsonArray; +import jakarta.json.JsonException; +import jakarta.json.JsonObject; +import jakarta.json.JsonReader; import jakarta.json.JsonWriter; import jakarta.json.JsonWriterFactory; import jakarta.json.stream.JsonGenerator; @@ -19,17 +22,19 @@ public class JsonUtil { private static final Logger logger = Logger.getLogger(JsonUtil.class.getCanonicalName()); + private JsonUtil() {} + /** * Make an attempt at pretty printing a String but will return the original * string if it isn't JSON or if there is any exception. */ public static String prettyPrint(String jsonString) { try { - com.google.gson.JsonParser jsonParser = new com.google.gson.JsonParser(); - JsonObject jsonObject = jsonParser.parse(jsonString).getAsJsonObject(); - Gson gson = new GsonBuilder().setPrettyPrinting().create(); - String prettyJson = gson.toJson(jsonObject); - return prettyJson; + if (jsonString.trim().startsWith("{")) { + return prettyPrint(getJsonObject(jsonString)); + } else { + return prettyPrint(getJsonArray(jsonString)); + } } catch (Exception ex) { logger.info("Returning original string due to exception: " + ex); return jsonString; @@ -47,7 +52,7 @@ public static String prettyPrint(JsonArray jsonArray) { return stringWriter.toString(); } - public static String prettyPrint(jakarta.json.JsonObject jsonObject) { + public static String prettyPrint(JsonObject jsonObject) { Map config = new HashMap<>(); config.put(JsonGenerator.PRETTY_PRINTING, true); JsonWriterFactory jsonWriterFactory = Json.createWriterFactory(config); @@ -57,16 +62,73 @@ public static String prettyPrint(jakarta.json.JsonObject jsonObject) { } return stringWriter.toString(); } - - public static jakarta.json.JsonObject getJsonObject(String serializedJson) { + + /** + * Return the contents of the string as a JSON object. + * This method closes its resources when an exception occurs, but does + * not catch any exceptions. + * @param serializedJson the JSON object serialized as a {@code String} + * @throws JsonException when parsing fails. + * @see #getJsonObject(InputStream) + * @see #getJsonObjectFromFile(String) + * @see #getJsonArray(String) + */ + public static JsonObject getJsonObject(String serializedJson) { try (StringReader rdr = new StringReader(serializedJson)) { - return Json.createReader(rdr).readObject(); + try (JsonReader jsonReader = Json.createReader(rdr)) { + return jsonReader.readObject(); + } + } + } + + /** + * Return the contents of the {@link InputStream} as a JSON object. + * + * This method closes its resources when an exception occurs, but does + * not catch any exceptions. + * The caller of this method is responsible for closing the provided stream. + * @param stream the input stream to read from + * @throws JsonException when parsing fails. + * @see #getJsonObject(String) + * @see #getJsonObjectFromFile(String) + */ + public static JsonObject getJsonObject(InputStream stream) { + try (JsonReader jsonReader = Json.createReader(stream)) { + return jsonReader.readObject(); } } - - public static jakarta.json.JsonArray getJsonArray(String serializedJson) { + + /** + * Return the contents of the file as a JSON object. + * This method closes its resources when an exception occurs, but does + * not catch any exceptions. + * @param fileName the name of the file to read from + * @throws FileNotFoundException when the file cannot be opened for reading + * @throws JsonException when parsing fails. + * @see #getJsonObject(String) + * @see #getJsonObject(InputStream) + */ + public static JsonObject getJsonObjectFromFile(String fileName) throws IOException { + try (FileReader rdr = new FileReader(fileName)) { + try (JsonReader jsonReader = Json.createReader(rdr)) { + return jsonReader.readObject(); + } + } + } + + /** + * Return the contents of the string as a JSON array. + * This method closes its resources when an exception occurs, but does + * not catch any exceptions. + * @param serializedJson the JSON array serialized as a {@code String} + * @throws JsonException when parsing fails. + * @see #getJsonObject(String) + */ + public static JsonArray getJsonArray(String serializedJson) { try (StringReader rdr = new StringReader(serializedJson)) { - return Json.createReader(rdr).readArray(); + try (JsonReader jsonReader = Json.createReader(rdr)) { + return jsonReader.readArray(); + } } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java index d4d03dbf6d9..18b3eb91689 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java @@ -1,8 +1,10 @@ package edu.harvard.iq.dataverse.workflow; +import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetLock; import edu.harvard.iq.dataverse.DatasetServiceBean; import edu.harvard.iq.dataverse.DataverseRequestServiceBean; +import edu.harvard.iq.dataverse.DvObjectServiceBean; import edu.harvard.iq.dataverse.EjbDataverseEngine; import edu.harvard.iq.dataverse.RoleAssigneeServiceBean; import edu.harvard.iq.dataverse.UserNotification; @@ -58,6 +60,9 @@ public class WorkflowServiceBean { @EJB DatasetServiceBean datasets; + + @EJB + DvObjectServiceBean dvObjects; @EJB SettingsServiceBean settings; @@ -387,16 +392,11 @@ private void workflowCompleted(Workflow wf, WorkflowContext ctxt) { //Now lock for FinalizePublication - this block mirrors that in PublishDatasetCommand AuthenticatedUser user = ctxt.getRequest().getAuthenticatedUser(); DatasetLock lock = new DatasetLock(DatasetLock.Reason.finalizePublication, user); - lock.setDataset(ctxt.getDataset()); - String currentGlobalIdProtocol = settings.getValueForKey(SettingsServiceBean.Key.Protocol, ""); - String currentGlobalAuthority= settings.getValueForKey(SettingsServiceBean.Key.Authority, ""); - String dataFilePIDFormat = settings.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); + Dataset dataset = ctxt.getDataset(); + lock.setDataset(dataset); boolean registerGlobalIdsForFiles = - (currentGlobalIdProtocol.equals(ctxt.getDataset().getProtocol()) || dataFilePIDFormat.equals("INDEPENDENT")) - && systemConfig.isFilePIDsEnabledForCollection(ctxt.getDataset().getOwner()); - if ( registerGlobalIdsForFiles ){ - registerGlobalIdsForFiles = currentGlobalAuthority.equals( ctxt.getDataset().getAuthority() ); - } + systemConfig.isFilePIDsEnabledForCollection(ctxt.getDataset().getOwner()) && + dvObjects.getEffectivePidGenerator(dataset).canCreatePidsLike(dataset.getGlobalId()); boolean validatePhysicalFiles = systemConfig.isDatafileValidationOnPublishEnabled(); String info = "Publishing the dataset; "; diff --git a/src/main/java/edu/harvard/iq/dataverse/workflows/WorkflowUtil.java b/src/main/java/edu/harvard/iq/dataverse/workflows/WorkflowUtil.java index 456b829ba61..b104f113db2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflows/WorkflowUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflows/WorkflowUtil.java @@ -3,7 +3,6 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.api.Util; -import java.io.StringReader; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; @@ -11,7 +10,7 @@ import jakarta.json.Json; import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObject; - +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.Success; @@ -42,8 +41,8 @@ public static JsonArrayBuilder getAllWorkflowComments(DatasetVersion datasetVers } public static WorkflowStepResult parseResponse(String externalData) { - try (StringReader reader = new StringReader(externalData)) { - JsonObject response = Json.createReader(reader).readObject(); + try { + JsonObject response = JsonUtil.getJsonObject(externalData); String status = null; //Lower case is documented, upper case is deprecated if(response.containsKey("status")) { diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 7d263a14475..f1c18be22df 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -55,6 +55,8 @@ affiliation=Affiliation storage=Storage curationLabels=Curation Labels metadataLanguage=Dataset Metadata Language +guestbookEntryOption=Guestbook Entry Option +pidProviderOption=PID Provider Option createDataverse=Create Dataverse remove=Remove done=Done @@ -63,6 +65,7 @@ manager=Manager curator=Curator explore=Explore download=Download +transfer=Globus Transfer downloadOriginal=Original Format downloadArchival=Archival Format (.tab) deaccession=Deaccession @@ -154,6 +157,7 @@ contact.support=Support contact.from=From contact.from.required=User email is required. contact.from.invalid=Email is invalid. +contact.from.emailPlaceholder=name@email.xyz contact.subject=Subject contact.subject.required=Subject is required. contact.subject.selectTab.top=Select subject... @@ -183,6 +187,7 @@ contact.context.file.intro={0}\n\nYou have just been sent the following message contact.context.file.ending=\n\n---\n\n{0}\n{1}\n\nGo to file {2}/file.xhtml?fileId={3}\n\nYou received this email because you have been listed as a contact for the dataset. If you believe this was an error, please contact {4} at {5}. To respond directly to the individual who sent the message, simply reply to this email. contact.context.support.intro={0},\n\nThe following message was sent from {1}.\n\n---\n\n contact.context.support.ending=\n\n---\n\nMessage sent from Support contact form. +contact.sent=Message sent. # dataverseuser.xhtml account.info=Account Information @@ -207,6 +212,7 @@ notification.welcome=Welcome to {0}! Get started by adding or finding data. Have notification.welcomeConfirmEmail=Also, check for your welcome email to verify your address. notification.demoSite=Demo Site notification.requestFileAccess=File access requested for dataset: {0} was made by {1} ({2}). +notification.requestedFileAccess=You have requested access to files in dataset: {0}. notification.grantFileAccess=Access granted for files in dataset: {0}. notification.rejectFileAccess=Access rejected for requested files in dataset: {0}. notification.createDataverse={0} was created in {1} . To learn more about what you can do with your dataverse, check out the {2}. @@ -627,7 +633,7 @@ harvestserver.tab.header.description=Description harvestserver.tab.header.definition=Definition Query harvestserver.tab.col.definition.default=All Published Local Datasets harvestserver.tab.header.stats=Datasets -harvestserver.tab.col.stats.empty=No records (empty set) +harvestserver.tab.col.stats.empty=No active records ({2} {2, choice, 0#records|1#record|2#records} marked as deleted) harvestserver.tab.col.stats.results={0} {0, choice, 0#datasets|1#dataset|2#datasets} ({1} {1, choice, 0#records|1#record|2#records} exported, {2} marked as deleted) harvestserver.tab.header.action=Actions harvestserver.tab.header.action.btn.export=Run Export @@ -745,7 +751,8 @@ dashboard.card.datamove.dataset.command.error.indexingProblem=Dataset could not notification.email.create.dataverse.subject={0}: Your dataverse has been created notification.email.create.dataset.subject={0}: Dataset "{1}" has been created notification.email.dataset.created.subject={0}: Dataset "{1}" has been created -notification.email.request.file.access.subject={0}: Access has been requested for a restricted file +notification.email.request.file.access.subject={0}: Access has been requested for a restricted file in dataset "{1}" +notification.email.requested.file.access.subject={0}: You have requested access to a restricted file in dataset "{1}" notification.email.grant.file.access.subject={0}: You have been granted access to a restricted file notification.email.rejected.file.access.subject={0}: Your request for access to a restricted file has been rejected notification.email.submit.dataset.subject={0}: Dataset "{1}" has been submitted for review @@ -771,16 +778,19 @@ notification.email.greeting.html=Hello,
          notification.email.welcome=Welcome to {0}! Get started by adding or finding data. Have questions? Check out the User Guide at {1}/{2}/user or contact {3} at {4} for assistance. notification.email.welcomeConfirmEmailAddOn=\n\nPlease verify your email address at {0} . Note, the verify link will expire after {1}. Send another verification email by visiting your account page. notification.email.requestFileAccess=File access requested for dataset: {0} by {1} ({2}). Manage permissions at {3}. +notification.email.requestFileAccess.guestbookResponse=

          Guestbook Response:

          {0} notification.email.grantFileAccess=Access granted for files in dataset: {0} (view at {1} ). notification.email.rejectFileAccess=Your request for access was rejected for the requested files in the dataset: {0} (view at {1} ). If you have any questions about why your request was rejected, you may reach the dataset owner using the "Contact" link on the upper right corner of the dataset page. # Bundle file editors, please note that "notification.email.createDataverse" is used in a unit test notification.email.createDataverse=Your new dataverse named {0} (view at {1} ) was created in {2} (view at {3} ). To learn more about what you can do with your dataverse, check out the Dataverse Management - User Guide at {4}/{5}/user/dataverse-management.html . # Bundle file editors, please note that "notification.email.createDataset" is used in a unit test notification.email.createDataset=Your new dataset named {0} (view at {1} ) was created in {2} (view at {3} ). To learn more about what you can do with a dataset, check out the Dataset Management - User Guide at {4}/{5}/user/dataset-management.html . -notification.email.wasSubmittedForReview={0} (view at {1} ) was submitted for review to be published in {2} (view at {3} ). Don''t forget to publish it or send it back to the contributor, {4} ({5})\! -notification.email.wasReturnedByReviewer={0} (view at {1} ) was returned by the curator of {2} (view at {3} ). -notification.email.wasPublished={0} (view at {1} ) was published in {2} (view at {3} ). -notification.email.publishFailedPidReg={0} (view at {1} ) in {2} (view at {3} ) could not be published due to a failure to register, or update the Global Identifier for the dataset or one of the files in it. Contact support if this continues to happen. +notification.email.wasSubmittedForReview=Your dataset named {0} (view at {1} ) was submitted for review to be published in {2} (view at {3} ). Don''t forget to publish it or send it back to the contributor, {4} ({5})\! +notification.email.wasReturnedByReviewer=Your dataset named {0} (view at {1} ) was returned by the curator of {2} (view at {3} ). +notification.email.wasReturnedByReviewerReason=Here is the curator comment: {0} +notification.email.wasReturnedByReviewer.collectionContacts=You may contact the collection administrator for more information: {0} +notification.email.wasPublished=Your dataset named {0} (view at {1} ) was published in {2} (view at {3} ). +notification.email.publishFailedPidReg=Your dataset named {0} (view at {1} ) in {2} (view at {3} ) could not be published due to a failure to register, or update the Global Identifier for the dataset or one of the files in it. Contact support if this continues to happen. notification.email.closing=\n\nYou may contact us for support at {0}.\n\nThank you,\n{1} notification.email.closing.html=

          You may contact us for support at {0}.

          Thank you,
          {1} notification.email.assignRole=You are now {0} for the {1} "{2}" (view at {3} ). @@ -789,6 +799,7 @@ notification.email.changeEmail=Hello, {0}.{1}\n\nPlease contact us if you did no notification.email.passwordReset=Hi {0},\n\nSomeone, hopefully you, requested a password reset for {1}.\n\nPlease click the link below to reset your Dataverse account password:\n\n {2} \n\n The link above will only work for the next {3} minutes.\n\n Please contact us if you did not request this password reset or need further help. notification.email.passwordReset.subject=Dataverse Password Reset Requested notification.email.datasetWasCreated=Dataset "{1}" was just created by {2} in the {3} collection. +notification.email.requestedFileAccess=You have requested access to a file(s) in dataset "{1}". Your request has been sent to the managers of this dataset who will grant or reject your request. If you have any questions, you may reach the dataset managers using the "Contact" link on the upper right corner of the dataset page. hours=hours hour=hour minutes=minutes @@ -823,6 +834,8 @@ dataverse.curationLabels.title=A set of curation status labels that are used to dataverse.curationLabels.disabled=Disabled dataverse.category=Category dataverse.category.title=The type that most closely reflects this dataverse. +dataverse.guestbookentryatrequest.title=Whether Guestbooks are displayed to users when they request file access or when they download files. +dataverse.pidProvider.title=The source of PIDs (DOIs, Handles, etc.) when a new PID is created. dataverse.type.selectTab.top=Select one... dataverse.type.selectTab.researchers=Researcher dataverse.type.selectTab.researchProjects=Research Project @@ -867,7 +880,7 @@ dataverse.option.deleteDataverse=Delete Dataverse dataverse.publish.btn=Publish dataverse.publish.header=Publish Dataverse dataverse.nopublished=No Published Dataverses -dataverse.nopublished.tip=In order to use this feature you must have at least one published dataverse. +dataverse.nopublished.tip=In order to use this feature you must have at least one published or linked dataverse. dataverse.contact=Email Dataverse Contact dataverse.link=Link Dataverse dataverse.link.btn.tip=Link to Your Dataverse @@ -916,10 +929,18 @@ dataverse.update.failure=This dataverse was not able to be updated. dataverse.selected=Selected dataverse.listing.error=Fatal error trying to list the contents of the dataverse. Please report this error to the Dataverse administrator. dataverse.datasize=Total size of the files stored in this dataverse: {0} bytes +dataverse.storage.quota.allocation=Total quota allocation for this collection: {0} bytes +dataverse.storage.quota.notdefined=No quota defined for this collection +dataverse.storage.quota.updated=Storage quota successfully set for the collection +dataverse.storage.quota.deleted=Storage quota successfully disabled for the collection +dataverse.storage.quota.superusersonly=Only superusers can change storage quotas. +dataverse.storage.use=Total recorded size of the files stored in this collection (user-uploaded files plus the versions in the archival tab-delimited format when applicable): {0} bytes dataverse.datasize.ioerror=Fatal IO error while trying to determine the total size of the files stored in the dataverse. Please report this error to the Dataverse administrator. dataverse.inherited=(inherited from enclosing Dataverse) dataverse.default=(Default) dataverse.metadatalanguage.setatdatasetcreation=Chosen at Dataset Creation +dataverse.guestbookentry.atdownload=Guestbook Entry At Download +dataverse.guestbookentry.atrequest=Guestbook Entry At Access Request # rolesAndPermissionsFragment.xhtml # advanced.xhtml @@ -999,6 +1020,9 @@ dataverse.results.btn.sort.option.relevance=Relevance dataverse.results.cards.foundInMetadata=Found in Metadata Fields: dataverse.results.cards.files.tabularData=Tabular Data dataverse.results.solrIsDown=Please note: Due to an internal error, browsing and searching is not available. +dataverse.results.solrIsTemporarilyUnavailable=Search Engine service (Solr) is temporarily unavailable because of high load. Please try again later. +dataverse.results.solrIsTemporarilyUnavailable.extraText=Note that all the datasets that are part of this collection are accessible via direct links and registered DOIs. +dataverse.results.solrFacetsDisabled=Facets temporarily unavailable. dataverse.theme.title=Theme dataverse.theme.inheritCustomization.title=For this dataverse, use the same theme as the parent dataverse. dataverse.theme.inheritCustomization.label=Inherit Theme @@ -1361,6 +1385,16 @@ dataset.guestbookResponse.guestbook.additionalQuestions=Additional Questions dataset.guestbookResponse.showPreview.errorMessage=Can't show preview. dataset.guestbookResponse.showPreview.errorDetail=Couldn't write guestbook response. +#GuestbookResponse +dataset.guestbookResponse=Guestbook Response +dataset.guestbookResponse.id=Guestbook Response ID +dataset.guestbookResponse.date=Response Date +dataset.guestbookResponse.respondent=Respondent +dataset.guestbookResponse.question=Q +dataset.guestbookResponse.answer=A +dataset.guestbookResponse.noResponse=(No Response) + + # dataset.xhtml dataset.configureBtn=Configure dataset.pageTitle=Add New Dataset @@ -1368,8 +1402,10 @@ dataset.pageTitle=Add New Dataset dataset.accessBtn=Access Dataset dataset.accessBtn.header.download=Download Options dataset.accessBtn.header.explore=Explore Options +dataset.accessBtn.header.configure=Configure Options dataset.accessBtn.header.compute=Compute Options dataset.accessBtn.download.size=ZIP ({0}) +dataset.accessBtn.transfer.size=({0}) dataset.accessBtn.too.big=The dataset is too large to download. Please select the files you need from the files table. dataset.accessBtn.original.too.big=The dataset is too large to download in the original format. Please select the files you need from the files table. dataset.accessBtn.archival.too.big=The dataset is too large to download in the archival format. Please select the files you need from the files table. @@ -1443,7 +1479,7 @@ dataset.submit.failure.inReview=You cannot submit this dataset for review becaus dataset.status.failure.notallowed=Status update failed - label not allowed dataset.status.failure.disabled=Status labeling disabled for this dataset dataset.status.failure.isReleased=Latest version of dataset is already released. Status can only be set on draft versions -dataset.rejectMessage=Return this dataset to contributor for modification. +dataset.rejectMessage=Return this dataset to contributor for modification. The reason for return entered below will be sent by email to the author. dataset.rejectMessage.label=Return to Author Reason dataset.rejectWatermark=Please enter a reason for returning this dataset to its author(s). dataset.reject.enterReason.error=Reason for return to author is required. @@ -1451,6 +1487,7 @@ dataset.reject.success=This dataset has been sent back to the contributor. dataset.reject.failure=Dataset Submission Return Failed - {0} dataset.reject.datasetNull=Cannot return the dataset to the author(s) because it is null. dataset.reject.datasetNotInReview=This dataset cannot be return to the author(s) because the latest version is not In Review. The author(s) needs to click Submit for Review first. +dataset.reject.commentNull=You must enter a reason for returning a dataset to the author(s). dataset.publish.tip=Are you sure you want to publish this dataset? Once you do so it must remain published. dataset.publish.terms.tip=This version of the dataset will be published with the following terms: dataset.publish.terms.help.tip=To change the terms for this version, click the Cancel button and go to the Terms tab for this dataset. @@ -1605,6 +1642,7 @@ dataset.metadata.alternativePersistentId.tip=A previously used persistent identi dataset.metadata.invalidEntry=is not a valid entry. dataset.metadata.invalidDate=is not a valid date. "yyyy" is a supported format. dataset.metadata.invalidNumber=is not a valid number. +dataset.metadata.invalidGeospatialCoordinates=has invalid coordinates. East must be greater than West and North must be greater than South. Missing values are NOT allowed. dataset.metadata.invalidInteger=is not a valid integer. dataset.metadata.invalidURL=is not a valid URL. dataset.metadata.invalidEmail=is not a valid email address. @@ -1633,8 +1671,10 @@ dataset.inValidSelectedFilesForDownloadWithEmbargo=Embargoed and/or Restricted F dataset.noValidSelectedFilesForDownload=The selected file(s) may not be downloaded because you have not been granted access. dataset.mixedSelectedFilesForDownload=The restricted file(s) selected may not be downloaded because you have not been granted access. dataset.mixedSelectedFilesForDownloadWithEmbargo=The embargoed and/or restricted file(s) selected may not be downloaded because you have not been granted access. - +dataset.mixedSelectedFilesForTransfer=Some file(s) cannot be transferred. (They are restricted, embargoed, or not Globus accessible.) +dataset.inValidSelectedFilesForTransfer=Ineligible Files Selected dataset.downloadUnrestricted=Click Continue to download the files you have access to download. +dataset.transferUnrestricted=Click Continue to transfer the elligible files. dataset.requestAccessToRestrictedFiles=You may request access to the restricted file(s) by clicking the Request Access button. dataset.requestAccessToRestrictedFilesWithEmbargo=Embargoed files cannot be accessed during the embargo period. If your selection contains restricted files, you may request access to them by clicking the Request Access button. @@ -1678,7 +1718,8 @@ file.select.tooltip=Select Files file.selectAllFiles=Select all {0} files in this dataset. file.dynamicCounter.filesPerPage=Files Per Page file.selectToAddBtn=Select Files to Add -file.selectToAdd.tipLimit=File upload limit is {0} per file. +file.selectToAdd.tipLimit=File upload limit is {0} per file. +file.selectToAdd.tipQuotaRemaining=Storage quota: {0} remaining. file.selectToAdd.tipMaxNumFiles=Maximum of {0} {0, choice, 0#files|1#file|2#files} per upload. file.selectToAdd.tipTabularLimit=Tabular file ingest is limited to {2}. file.selectToAdd.tipPerFileTabularLimit=Ingest is limited to the following file sizes based on their format: {0}. @@ -1967,7 +2008,8 @@ file.deleteFileDialog.immediate=The file will be deleted after you click on the file.deleteFileDialog.multiple.immediate=The file(s) will be deleted after you click on the Delete button. file.deleteFileDialog.header=Delete Files file.deleteFileDialog.failed.tip=Files will not be removed from previously published versions of the dataset. -file.deaccessionDialog.tip=Once you deaccession this dataset it will no longer be viewable by the public. +file.deaccessionDialog.tip.permanent=Deaccession is permanent. +file.deaccessionDialog.tip=This dataset will no longer be public and a tumbstone will display the reason for deaccessioning.
          Please read the documentation if you have any questions. file.deaccessionDialog.version=Version file.deaccessionDialog.reason.question1=Which version(s) do you want to deaccession? file.deaccessionDialog.reason.question2=What is the reason for deaccession? @@ -1981,8 +2023,8 @@ file.deaccessionDialog.reason.selectItem.other=Other (Please type reason in spac file.deaccessionDialog.enterInfo=Please enter additional information about the reason for deaccession. file.deaccessionDialog.leaveURL=If applicable, please leave a URL where this dataset can be accessed after deaccessioning. file.deaccessionDialog.leaveURL.watermark=Optional dataset site, http://... -file.deaccessionDialog.deaccession.tip=Are you sure you want to deaccession? The selected version(s) will no longer be viewable by the public. -file.deaccessionDialog.deaccessionDataset.tip=Are you sure you want to deaccession this dataset? It will no longer be viewable by the public. +file.deaccessionDialog.deaccession.tip=Are you sure you want to deaccession? This is permanent and the selected version(s) will no longer be viewable by the public. +file.deaccessionDialog.deaccessionDataset.tip=Are you sure you want to deaccession this dataset? This is permanent an it will no longer be viewable by the public. file.deaccessionDialog.dialog.selectVersion.error=Please select version(s) for deaccessioning. file.deaccessionDialog.dialog.reason.error=Please select reason for deaccessioning. file.deaccessionDialog.dialog.url.error=Please enter valid forwarding URL. @@ -2168,6 +2210,15 @@ ingest.csv.lineMismatch=Mismatch between line counts in first and final passes!, ingest.csv.recordMismatch=Reading mismatch, line {0} of the Data file: {1} delimited values expected, {2} found. ingest.csv.nullStream=Stream can't be null. +file.ingest=Ingest +file.uningest=Uningest +file.ingest.alreadyIngestedWarning=This file has already been ingested +file.ingest.ingestInProgressWarning=Ingestion of this file is already in progress +file.ingest.cantIngestFileWarning=Ingest not supported for this file type +file.ingest.ingestQueued=Ingestion has been requested +file.ingest.cantUningestFileWarning=This file cannot be uningested +file.uningest.complete=Uningestion of this file has been completed + # editdatafile.xhtml # editFilesFragment.xhtml @@ -2186,6 +2237,8 @@ file.message.replaceSuccess=The file has been replaced. file.addreplace.file_size_ok=File size is in range. file.addreplace.error.byte_abrev=B file.addreplace.error.file_exceeds_limit=This file size ({0}) exceeds the size limit of {1}. +file.addreplace.error.quota_exceeded=This file (size {0}) exceeds the remaining storage quota of {1}. +file.addreplace.error.unzipped.quota_exceeded=Unzipped files exceed the remaining storage quota of {0}. file.addreplace.error.dataset_is_null=The dataset cannot be null. file.addreplace.error.dataset_id_is_null=The dataset ID cannot be null. file.addreplace.error.parsing=Error in parsing provided json @@ -2375,10 +2428,6 @@ api.prov.error.freeformMissingJsonKey=The JSON object you send must have a key c api.prov.error.freeformNoText=No provenance free form text available for this file. api.prov.error.noDataFileFound=Could not find a file based on ID. -bagit.sourceOrganization=Dataverse Installation () -bagit.sourceOrganizationAddress= -bagit.sourceOrganizationEmail= - bagit.checksum.validation.error=Invalid checksum for file "{0}". Manifest checksum={2}, calculated checksum={3}, type={1} bagit.checksum.validation.exception=Error while calculating checksum for file "{0}". Checksum type={1}, error={2} bagit.validation.bag.file.not.found=Invalid BagIt package: "{0}" @@ -2494,6 +2543,7 @@ dataset.registered=DatasetRegistered dataset.registered.msg=Your dataset is now registered. dataset.notlinked=DatasetNotLinked dataset.notlinked.msg=There was a problem linking this dataset to yours: +dataset.linking.popop.already.linked.note=Note: This dataset is already linked to the following dataverse(s): datasetversion.archive.success=Archival copy of Version successfully submitted datasetversion.archive.failure=Error in submitting an archival copy datasetversion.update.failure=Dataset Version Update failed. Changes are still in the DRAFT version. @@ -2616,6 +2666,10 @@ admin.api.deleteUser.success=Authenticated User {0} deleted. #Files.java files.api.metadata.update.duplicateFile=Filename already exists at {0} files.api.no.draft=No draft available for this file +files.api.no.draftOrUnauth=Dataset version cannot be found or unauthorized. +files.api.notFoundInVersion="File metadata for file with id {0} in dataset version {1} not found" +files.api.only.tabular.supported=This operation is only available for tabular files. +files.api.fileNotFound=File could not be found. #Datasets.java datasets.api.updatePIDMetadata.failure.dataset.must.be.released=Modify Registration Metadata must be run on a published dataset. @@ -2641,7 +2695,13 @@ datasets.api.privateurl.anonymized.error.released=Can't create a URL for anonymi datasets.api.creationdate=Date Created datasets.api.modificationdate=Last Modified Date datasets.api.curationstatus=Curation Status - +datasets.api.version.files.invalid.order.criteria=Invalid order criteria: {0} +datasets.api.version.files.invalid.access.status=Invalid access status: {0} +datasets.api.deaccessionDataset.invalid.version.identifier.error=Only {0} or a specific version can be deaccessioned +datasets.api.deaccessionDataset.invalid.forward.url=Invalid deaccession forward URL: {0} +datasets.api.globusdownloaddisabled=File transfer from Dataverse via Globus is not available for this dataset. +datasets.api.globusdownloadnotfound=List of files to transfer not found. +datasets.api.globusuploaddisabled=File transfer to Dataverse via Globus is not available for this dataset. #Dataverses.java dataverses.api.update.default.contributor.role.failure.role.not.found=Role {0} not found. @@ -2661,6 +2721,9 @@ dataverses.api.move.dataverse.error.forceMove=Please use the parameter ?forceMov dataverses.api.create.dataset.error.mustIncludeVersion=Please provide initial version in the dataset json dataverses.api.create.dataset.error.superuserFiles=Only a superuser may add files via this api dataverses.api.create.dataset.error.mustIncludeAuthorName=Please provide author name in the dataset json +dataverses.api.validate.json.succeeded=The Dataset JSON provided is valid for this Dataverse Collection. +dataverses.api.validate.json.failed=The Dataset JSON provided failed validation with the following error: +dataverses.api.validate.json.exception=Validation failed with following exception: #Access.java access.api.allowRequests.failure.noDataset=Could not find Dataset with id: {0} diff --git a/src/main/java/propertyFiles/License.properties b/src/main/java/propertyFiles/License.properties index 6ded8c41d5b..6602f408a12 100644 --- a/src/main/java/propertyFiles/License.properties +++ b/src/main/java/propertyFiles/License.properties @@ -2,3 +2,5 @@ license.cc0_1.0.description=Creative Commons CC0 1.0 Universal Public Domain Ded license.cc_by_4.0.description=Creative Commons Attribution 4.0 International License. license.cc0_1.0.name=CC0 1.0 license.cc_by_4.0.name=CC BY 4.0 +license.custom_terms.name=Custom Terms + diff --git a/src/main/java/propertyFiles/geospatial.properties b/src/main/java/propertyFiles/geospatial.properties index 04db8d3d05f..ce258071c27 100644 --- a/src/main/java/propertyFiles/geospatial.properties +++ b/src/main/java/propertyFiles/geospatial.properties @@ -8,10 +8,10 @@ datasetfieldtype.city.title=City datasetfieldtype.otherGeographicCoverage.title=Other datasetfieldtype.geographicUnit.title=Geographic Unit datasetfieldtype.geographicBoundingBox.title=Geographic Bounding Box -datasetfieldtype.westLongitude.title=West Longitude -datasetfieldtype.eastLongitude.title=East Longitude -datasetfieldtype.northLongitude.title=North Latitude -datasetfieldtype.southLongitude.title=South Latitude +datasetfieldtype.westLongitude.title=Westernmost (Left) Longitude +datasetfieldtype.eastLongitude.title=Easternmost (Right) Longitude +datasetfieldtype.northLatitude.title=Northernmost (Top) Latitude +datasetfieldtype.southLatitude.title=Southernmost (Bottom) Latitude datasetfieldtype.geographicCoverage.description=Information on the geographic coverage of the data. Includes the total geographic scope of the data. datasetfieldtype.country.description=The country or nation that the Dataset is about. datasetfieldtype.state.description=The state or province that the Dataset is about. Use GeoNames for correct spelling and avoid abbreviations. @@ -21,8 +21,8 @@ datasetfieldtype.geographicUnit.description=Lowest level of geographic aggregati datasetfieldtype.geographicBoundingBox.description=The fundamental geometric description for any Dataset that models geography is the geographic bounding box. It describes the minimum box, defined by west and east longitudes and north and south latitudes, which includes the largest geographic extent of the Dataset's geographic coverage. This element is used in the first pass of a coordinate-based search. Inclusion of this element in the codebook is recommended, but is required if the bound polygon box is included. datasetfieldtype.westLongitude.description=Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= West Bounding Longitude Value <= 180,0. datasetfieldtype.eastLongitude.description=Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= East Bounding Longitude Value <= 180,0. -datasetfieldtype.northLongitude.description=Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0. -datasetfieldtype.southLongitude.description=Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0. +datasetfieldtype.northLatitude.description=Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0. +datasetfieldtype.southLatitude.description=Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0. datasetfieldtype.geographicCoverage.watermark= datasetfieldtype.country.watermark= datasetfieldtype.state.watermark= @@ -32,8 +32,8 @@ datasetfieldtype.geographicUnit.watermark= datasetfieldtype.geographicBoundingBox.watermark= datasetfieldtype.westLongitude.watermark= datasetfieldtype.eastLongitude.watermark= -datasetfieldtype.northLongitude.watermark= -datasetfieldtype.southLongitude.watermark= +datasetfieldtype.northLatitude.watermark= +datasetfieldtype.southLatitude.watermark= controlledvocabulary.country.afghanistan=Afghanistan controlledvocabulary.country.albania=Albania controlledvocabulary.country.algeria=Algeria @@ -89,10 +89,10 @@ controlledvocabulary.country.cook_islands=Cook Islands controlledvocabulary.country.costa_rica=Costa Rica controlledvocabulary.country.croatia=Croatia controlledvocabulary.country.cuba=Cuba -controlledvocabulary.country.curacao=Curaçao +controlledvocabulary.country.curacao=Cura\u00e7ao controlledvocabulary.country.cyprus=Cyprus controlledvocabulary.country.czech_republic=Czech Republic -controlledvocabulary.country.cote_d'ivoire=Côte d'Ivoire +controlledvocabulary.country.cote_d'ivoire=C\u00f4te d'Ivoire controlledvocabulary.country.denmark=Denmark controlledvocabulary.country.djibouti=Djibouti controlledvocabulary.country.dominica=Dominica @@ -216,8 +216,8 @@ controlledvocabulary.country.qatar=Qatar controlledvocabulary.country.romania=Romania controlledvocabulary.country.russian_federation=Russian Federation controlledvocabulary.country.rwanda=Rwanda -controlledvocabulary.country.reunion=Réunion -controlledvocabulary.country.saint_barthelemy=Saint Barthélemy +controlledvocabulary.country.reunion=R\u00e9union +controlledvocabulary.country.saint_barthelemy=Saint Barth\u00e9lemy controlledvocabulary.country.saint_helena,_ascension_and_tristan_da_cunha=Saint Helena, Ascension and Tristan da Cunha controlledvocabulary.country.saint_kitts_and_nevis=Saint Kitts and Nevis controlledvocabulary.country.saint_lucia=Saint Lucia @@ -282,4 +282,4 @@ controlledvocabulary.country.western_sahara=Western Sahara controlledvocabulary.country.yemen=Yemen controlledvocabulary.country.zambia=Zambia controlledvocabulary.country.zimbabwe=Zimbabwe -controlledvocabulary.country.aland_islands=Åland Islands +controlledvocabulary.country.aland_islands=\u00c5land Islands diff --git a/src/main/java/propertyFiles/staticSearchFields.properties b/src/main/java/propertyFiles/staticSearchFields.properties index ab03de64f23..53d0080b87c 100644 --- a/src/main/java/propertyFiles/staticSearchFields.properties +++ b/src/main/java/propertyFiles/staticSearchFields.properties @@ -3,6 +3,7 @@ staticSearchFields.metadata_type_ss=Dataset Feature staticSearchFields.dvCategory=Dataverse Category staticSearchFields.metadataSource=Metadata Source staticSearchFields.publicationDate=Publication Year +staticSearchFields.license=License staticSearchFields.fileTypeGroupFacet=File Type staticSearchFields.dvObjectType=Type staticSearchFields.fileTag=File Tag diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index 7c16495f870..56c0fe0ff0a 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -9,10 +9,16 @@ dataverse.build= %ct.dataverse.siteUrl=http://${dataverse.fqdn}:8080 # FILES -dataverse.files.directory=/tmp/dataverse -# The variables are replaced with the environment variables from our base image, but still easy to override -%ct.dataverse.files.directory=${STORAGE_DIR} -%ct.dataverse.files.uploads=${STORAGE_DIR}/uploads +# NOTE: The following uses STORAGE_DIR for both containers and classic installations. When defaulting to +# "com.sun.aas.instanceRoot" if not present, it equals the hardcoded default "." in glassfish-web.xml +# (which is relative to the domain root folder). +# Also, be aware that this props file cannot provide any value for lookups in glassfish-web.xml during servlet +# initialization, as this file will not have been read yet! The names and their values are in sync here and over +# there to ensure the config checker is able to check for the directories (exist + writeable). +dataverse.files.directory=${STORAGE_DIR:/tmp/dataverse} +dataverse.files.uploads=${STORAGE_DIR:${com.sun.aas.instanceRoot}}/uploads +dataverse.files.docroot=${STORAGE_DIR:${com.sun.aas.instanceRoot}}/docroot +dataverse.files.globus-cache-maxage=5 # SEARCH INDEX dataverse.solr.host=localhost @@ -44,14 +50,6 @@ dataverse.oai.server.maxsets=100 # can be customized via the setting below: #dataverse.oai.server.repositoryname= -# PERSISTENT IDENTIFIER PROVIDERS -# EZID -dataverse.pid.ezid.api-url=https://ezid.cdlib.org - -# DataCite -dataverse.pid.datacite.mds-api-url=https://mds.test.datacite.org -dataverse.pid.datacite.rest-api-url=https://api.test.datacite.org - -# Handle.Net -dataverse.pid.handlenet.index=300 - +# AUTHENTICATION +dataverse.auth.oidc.pkce.max-cache-size=10000 +dataverse.auth.oidc.pkce.max-cache-age=300 diff --git a/src/main/resources/db/migration/V6.0.0.1__9599-guestbook-at-request.sql b/src/main/resources/db/migration/V6.0.0.1__9599-guestbook-at-request.sql new file mode 100644 index 00000000000..c90ee4a5329 --- /dev/null +++ b/src/main/resources/db/migration/V6.0.0.1__9599-guestbook-at-request.sql @@ -0,0 +1,63 @@ +ALTER TABLE fileaccessrequests ADD COLUMN IF NOT EXISTS request_state VARCHAR(64); +ALTER TABLE fileaccessrequests ADD COLUMN IF NOT EXISTS id SERIAL; +ALTER TABLE fileaccessrequests DROP CONSTRAINT IF EXISTS fileaccessrequests_pkey; +ALTER TABLE fileaccessrequests ADD CONSTRAINT fileaccessrequests_pkey PRIMARY KEY (id); +ALTER TABLE fileaccessrequests ADD COLUMN IF NOT EXISTS guestbookresponse_id INT; +ALTER TABLE fileaccessrequests DROP CONSTRAINT IF EXISTS fk_fileaccessrequests_guestbookresponse; +ALTER TABLE fileaccessrequests ADD CONSTRAINT fk_fileaccessrequests_guestbookresponse FOREIGN KEY (guestbookresponse_id) REFERENCES guestbookresponse(id); +DROP INDEX IF EXISTS created_requests; +CREATE UNIQUE INDEX created_requests ON fileaccessrequests (datafile_id, authenticated_user_id) WHERE request_state='CREATED'; + +ALTER TABLE dataverse ADD COLUMN IF NOT EXISTS guestbookatrequest bool; +ALTER TABLE dataset ADD COLUMN IF NOT EXISTS guestbookatrequest bool; + +ALTER TABLE guestbookresponse ADD COLUMN IF NOT EXISTS eventtype VARCHAR(255); +ALTER TABLE guestbookresponse ADD COLUMN IF NOT EXISTS sessionid VARCHAR(255); + +DO $$ + BEGIN + IF EXISTS (select 1 from pg_class where relname='filedownload') THEN + + UPDATE guestbookresponse g + SET eventtype = (SELECT downloadtype FROM filedownload f where f.guestbookresponse_id = g.id), + sessionid = (SELECT sessionid FROM filedownload f where f.guestbookresponse_id=g.id); + DROP TABLE filedownload; + END IF; + END + $$ ; + + +-- This creates a function that ESTIMATES the size of the +-- GuestbookResponse table (for the metrics display), instead +-- of relying on straight "SELECT COUNT(*) ..." +-- It uses statistics to estimate the number of guestbook entries +-- and the fraction of them related to downloads, +-- i.e. those that weren't created for 'AccessRequest' events. +-- Significant potential savings for an active installation. +-- See https://github.com/IQSS/dataverse/issues/8840 and +-- https://github.com/IQSS/dataverse/pull/8972 for more details + +CREATE OR REPLACE FUNCTION estimateGuestBookResponseTableSize() +RETURNS bigint AS $$ +DECLARE + estimatedsize bigint; +BEGIN + SELECT CASE WHEN relpages<10 THEN 0 + ELSE ((reltuples / relpages) + * (pg_relation_size('public.guestbookresponse') / current_setting('block_size')::int))::bigint + * (SELECT CASE WHEN ((select count(*) from pg_stats where tablename='guestbookresponse') = 0 + OR (select array_position(most_common_vals::text::text[], 'AccessRequest') + FROM pg_stats WHERE tablename='guestbookresponse' AND attname='eventtype') IS NULL) THEN 1 + ELSE 1 - (SELECT (most_common_freqs::text::text[])[array_position(most_common_vals::text::text[], 'AccessRequest')]::bigint + FROM pg_stats WHERE tablename='guestbookresponse' and attname='eventtype') END) + END + FROM pg_class + WHERE oid = 'public.guestbookresponse'::regclass INTO estimatedsize; + + if estimatedsize = 0 then + SELECT COUNT(id) FROM guestbookresponse WHERE eventtype!= 'AccessRequest' INTO estimatedsize; + END if; + + RETURN estimatedsize; +END; +$$ LANGUAGE plpgsql IMMUTABLE; diff --git a/src/main/resources/db/migration/V6.0.0.2__9763-embargocitationdate.sql b/src/main/resources/db/migration/V6.0.0.2__9763-embargocitationdate.sql new file mode 100644 index 00000000000..536798015ba --- /dev/null +++ b/src/main/resources/db/migration/V6.0.0.2__9763-embargocitationdate.sql @@ -0,0 +1,14 @@ +-- An aggregated timestamp which is the latest of the availability dates of any embargoed files in the first published version, if present +ALTER TABLE dataset ADD COLUMN IF NOT EXISTS embargoCitationDate timestamp without time zone; +-- ... and an update query that will populate this column for all the published datasets with embargoed files in the first released version: +UPDATE dataset SET embargocitationdate=o.embargocitationdate +FROM (SELECT d.id, MAX(e.dateavailable) AS embargocitationdate +FROM embargo e, dataset d, datafile f, datasetversion v, filemetadata m +WHERE v.dataset_id = d.id +AND v.versionstate = 'RELEASED' +AND v.versionnumber = 1 +AND v.minorversionnumber = 0 +AND f.embargo_id = e.id +AND m.datasetversion_id = v.id +AND m.datafile_id = f.id GROUP BY d.id) o WHERE o.id = dataset.id; +-- (the query follows the logic that used to be in the method Dataset.getCitationDate() that calculated this adjusted date in real time). diff --git a/src/main/resources/db/migration/V6.0.0.3__10095-guestbook-at-request2.sql b/src/main/resources/db/migration/V6.0.0.3__10095-guestbook-at-request2.sql new file mode 100644 index 00000000000..b6157e6a782 --- /dev/null +++ b/src/main/resources/db/migration/V6.0.0.3__10095-guestbook-at-request2.sql @@ -0,0 +1,34 @@ +-- This creates a function that ESTIMATES the size of the +-- GuestbookResponse table (for the metrics display), instead +-- of relying on straight "SELECT COUNT(*) ..." +-- It uses statistics to estimate the number of guestbook entries +-- and the fraction of them related to downloads, +-- i.e. those that weren't created for 'AccessRequest' events. +-- Significant potential savings for an active installation. +-- See https://github.com/IQSS/dataverse/issues/8840 and +-- https://github.com/IQSS/dataverse/pull/8972 for more details + +CREATE OR REPLACE FUNCTION estimateGuestBookResponseTableSize() +RETURNS bigint AS $$ +DECLARE + estimatedsize bigint; +BEGIN + SELECT CASE WHEN relpages<10 THEN 0 + ELSE ((reltuples / relpages) + * (pg_relation_size('public.guestbookresponse') / current_setting('block_size')::int))::bigint + * (SELECT CASE WHEN ((select count(*) from pg_stats where tablename='guestbookresponse') = 0 + OR (select array_position(most_common_vals::text::text[], 'AccessRequest') + FROM pg_stats WHERE tablename='guestbookresponse' AND attname='eventtype') IS NULL) THEN 1 + ELSE 1 - (SELECT (most_common_freqs::text::text[])[array_position(most_common_vals::text::text[], 'AccessRequest')]::float + FROM pg_stats WHERE tablename='guestbookresponse' and attname='eventtype') END) + END + FROM pg_class + WHERE oid = 'public.guestbookresponse'::regclass INTO estimatedsize; + + if estimatedsize = 0 then + SELECT COUNT(id) FROM guestbookresponse WHERE eventtype!= 'AccessRequest' INTO estimatedsize; + END if; + + RETURN estimatedsize; +END; +$$ LANGUAGE plpgsql IMMUTABLE; diff --git a/src/main/resources/db/migration/V6.0.0.4__10093-privateurluser_id_update.sql b/src/main/resources/db/migration/V6.0.0.4__10093-privateurluser_id_update.sql new file mode 100644 index 00000000000..260f191f557 --- /dev/null +++ b/src/main/resources/db/migration/V6.0.0.4__10093-privateurluser_id_update.sql @@ -0,0 +1 @@ + update roleassignment set assigneeidentifier=replace(assigneeidentifier, '#','!') where assigneeidentifier like '#%'; \ No newline at end of file diff --git a/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql b/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql new file mode 100644 index 00000000000..d6c067056ec --- /dev/null +++ b/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql @@ -0,0 +1,91 @@ +-- The somewhat convoluted queries below populate the storage sizes for the entire +-- DvObject tree, fast. It IS possible, to do it all with one recursive PostgresQL +-- query, that will crawl the tree from the leaves (DataFiles) up and add up the +-- sizes for all the Datasets/Collections above. Unfortunately, that appears to take +-- some hours on a database the size of the one at IQSS. So what we are doing +-- instead is first compute the total sizes of all the *directly* linked objects, +-- with a couple of linear queries. This will correctly calculate the sizes of all the +-- Datasets (since they can only contain DataFiles, without any other hierarchy) and +-- those Collections that only contain Datasets; but not the sizes of Collections that +-- have sub-collections. To take any sub-collections into account we will then run +-- a recursive query - but we only need to run it on the tree of Collections only, +-- which makes it reasonably fast on any real life instance. +-- *Temporarily* add this "tempstoragesize" column to the DvObject table. +-- It will be used to calculate the storage sizes of all the DvObjectContainers +-- (Datasets and Collections), as a matter of convenience. Once calculated, the values +-- will will be moved to the permanent StorageUse table. +ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS tempStorageSize BIGINT; +-- First we calculate the storage size of each individual dataset (a simple sum +-- of the storage sizes of all the files in the dataset). +-- For datafiles, the storage size = main file size by default +-- (we are excluding any harvested files and datasets): +UPDATE dvobject SET tempStorageSize=o.combinedStorageSize +FROM (SELECT datasetobject.id, SUM(file.filesize) AS combinedStorageSize +FROM dvobject fileobject, dataset datasetobject, datafile file +WHERE fileobject.owner_id = datasetobject.id +AND fileobject.id = file.id +AND datasetobject.harvestingclient_id IS null +GROUP BY datasetobject.id) o, dataset ds WHERE o.id = dvobject.id AND dvobject.dtype='Dataset' AND dvobject.id = ds.id AND ds.harvestingclient_id IS null; + +-- ... but for ingested tabular files the size of the saved original needs to be added, since +-- those also take space: +-- (should be safe to assume that there are no *harvested ingested* files) +UPDATE dvobject SET tempStorageSize=tempStorageSize+o.combinedStorageSize +FROM (SELECT datasetobject.id, COALESCE(SUM(dt.originalFileSize),0) AS combinedStorageSize +FROM dvobject fileobject, dvobject datasetobject, datafile file, datatable dt +WHERE fileobject.owner_id = datasetobject.id +AND fileobject.id = file.id +AND dt.datafile_id = file.id +GROUP BY datasetobject.id) o, dataset ds WHERE o.id = dvobject.id AND dvobject.dtype='Dataset' AND dvobject.id = ds.id AND ds.harvestingclient_id IS null; + +-- there may also be some auxiliary files registered in the database, such as +-- the content generated and deposited by external tools - diff. privacy stats +-- being one of the example. These are also considered the "payload" files that +-- we want to count for the purposes of calculating storage use. +UPDATE dvobject SET tempStorageSize=tempStorageSize+o.combinedStorageSize +FROM (SELECT datasetobject.id, COALESCE(SUM(aux.fileSize),0) AS combinedStorageSize +FROM dvobject fileobject, dvobject datasetobject, datafile file, auxiliaryFile aux +WHERE fileobject.owner_id = datasetobject.id +AND fileobject.id = file.id +AND aux.datafile_id = file.id +GROUP BY datasetobject.id) o, dataset ds WHERE o.id = dvobject.id AND dvobject.dtype='Dataset' AND dvobject.id = ds.id AND ds.harvestingclient_id IS null; + + +-- ... and then we can repeat the same for collections, by setting the storage size +-- to the sum of the storage sizes of the datasets *directly* in each collection: +-- (no attemp is made yet to recursively count the sizes all the chilld sub-collections) +UPDATE dvobject SET tempStorageSize=o.combinedStorageSize +FROM (SELECT collectionobject.id, SUM(datasetobject.tempStorageSize) AS combinedStorageSize +FROM dvobject datasetobject, dvobject collectionobject +WHERE datasetobject.owner_id = collectionobject.id +AND datasetobject.tempStorageSize IS NOT null +GROUP BY collectionobject.id) o WHERE o.id = dvobject.id AND dvobject.dtype='Dataverse'; + +-- And now we will update the storage sizes of all the Collection ("Dataverse") objects +-- that contain sub-collections, *recursively*, to add their sizes to the totals: +WITH RECURSIVE treestorage (id, owner_id, tempStorageSize, dtype) AS +( + -- All dataverses: + SELECT id, owner_id, tempStorageSize, dtype + FROM dvobject + WHERE dtype = 'Dataverse' + + UNION ALL + + -- Recursive Member: + SELECT dvobject.id, treestorage.owner_id, dvobject.tempStorageSize, treestorage.dtype + FROM treestorage, dvobject + WHERE treestorage.id = dvobject.owner_id + AND dvobject.dtype = 'Dataverse' +) +UPDATE dvobject SET tempStorageSize=tempStorageSize+(SELECT COALESCE(SUM(tempStorageSize),0) +FROM treestorage WHERE owner_id=dvobject.id) +WHERE dvobject.dtype = 'Dataverse' +AND dvobject.id IN (SELECT owner_id FROM treestorage WHERE owner_id IS NOT null); + +-- And, finally, we can move these calculated storage sizes of datasets and +-- collection to the dedicated new table StorageUse: +INSERT INTO storageuse (dvobjectcontainer_id,sizeinbytes) (SELECT id, tempstoragesize FROM dvobject WHERE dtype = 'Dataverse'); +INSERT INTO storageuse (dvobjectcontainer_id,sizeinbytes) (SELECT d.id, o.tempstoragesize FROM dvobject o, dataset d WHERE o.id = d.id AND d.harvestingclient_id IS NULL); +-- ... and drop the temporary column we added to DvObject earlier: +ALTER TABLE dvobject DROP column tempStorageSize diff --git a/src/main/resources/db/migration/V6.0.0.6__9506-track-thumb-failures.sql b/src/main/resources/db/migration/V6.0.0.6__9506-track-thumb-failures.sql new file mode 100644 index 00000000000..156960d2011 --- /dev/null +++ b/src/main/resources/db/migration/V6.0.0.6__9506-track-thumb-failures.sql @@ -0,0 +1 @@ +ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS previewimagefail BOOLEAN DEFAULT FALSE; diff --git a/src/main/resources/db/migration/V6.1.0.1__9728-universe-variablemetadata.sql b/src/main/resources/db/migration/V6.1.0.1__9728-universe-variablemetadata.sql new file mode 100644 index 00000000000..8e311c06b32 --- /dev/null +++ b/src/main/resources/db/migration/V6.1.0.1__9728-universe-variablemetadata.sql @@ -0,0 +1,2 @@ +-- increase field universe from 255 to text +ALTER TABLE variablemetadata ALTER COLUMN universe TYPE text; diff --git a/src/main/resources/db/migration/V6.1.0.2__8524-store-tabular-files-with-varheaders.sql b/src/main/resources/db/migration/V6.1.0.2__8524-store-tabular-files-with-varheaders.sql new file mode 100644 index 00000000000..7c52a00107a --- /dev/null +++ b/src/main/resources/db/migration/V6.1.0.2__8524-store-tabular-files-with-varheaders.sql @@ -0,0 +1 @@ +ALTER TABLE datatable ADD COLUMN IF NOT EXISTS storedWithVariableHeader BOOLEAN DEFAULT FALSE; diff --git a/src/main/resources/db/migration/V6.1.0.3__9983-missing-unique-constraints.sql b/src/main/resources/db/migration/V6.1.0.3__9983-missing-unique-constraints.sql new file mode 100644 index 00000000000..6cb3a455e4e --- /dev/null +++ b/src/main/resources/db/migration/V6.1.0.3__9983-missing-unique-constraints.sql @@ -0,0 +1,16 @@ +DO $$ +BEGIN + + BEGIN + ALTER TABLE externalvocabularyvalue ADD CONSTRAINT externalvocabularvalue_uri_key UNIQUE(uri); + EXCEPTION + WHEN duplicate_table THEN RAISE NOTICE 'Table unique constraint externalvocabularvalue_uri_key already exists'; + END; + + BEGIN + ALTER TABLE oaiset ADD CONSTRAINT oaiset_spec_key UNIQUE(spec); + EXCEPTION + WHEN duplicate_table THEN RAISE NOTICE 'Table unique constraint oaiset_spec_key already exists'; + END; + +END $$; \ No newline at end of file diff --git a/src/main/resources/db/migration/V6.1.0.4__5645-geospatial-fieldname-fix.sql b/src/main/resources/db/migration/V6.1.0.4__5645-geospatial-fieldname-fix.sql new file mode 100644 index 00000000000..2ab8cbc802e --- /dev/null +++ b/src/main/resources/db/migration/V6.1.0.4__5645-geospatial-fieldname-fix.sql @@ -0,0 +1,7 @@ +UPDATE datasetfieldtype +SET name = 'northLatitude' +WHERE name = 'northLongitude'; + +UPDATE datasetfieldtype +SET name = 'southLatitude' +WHERE name = 'southLongitude'; \ No newline at end of file diff --git a/src/main/resources/db/migration/V6.1.0.5__3623-multiple-pid-providers.sql b/src/main/resources/db/migration/V6.1.0.5__3623-multiple-pid-providers.sql new file mode 100644 index 00000000000..1d11e178abf --- /dev/null +++ b/src/main/resources/db/migration/V6.1.0.5__3623-multiple-pid-providers.sql @@ -0,0 +1,2 @@ +ALTER TABLE dataverse ADD COLUMN IF NOT EXISTS pidgeneratorspecs TEXT; +ALTER TABLE dataset ADD COLUMN IF NOT EXISTS pidgeneratorspecs TEXT; diff --git a/src/main/resources/edu/harvard/iq/dataverse/datacite_metadata_template.xml b/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml similarity index 100% rename from src/main/resources/edu/harvard/iq/dataverse/datacite_metadata_template.xml rename to src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml diff --git a/src/main/webapp/WEB-INF/glassfish-web.xml b/src/main/webapp/WEB-INF/glassfish-web.xml index e56d7013abf..015a309fd6b 100644 --- a/src/main/webapp/WEB-INF/glassfish-web.xml +++ b/src/main/webapp/WEB-INF/glassfish-web.xml @@ -10,13 +10,18 @@ - - - - + + + + + + + diff --git a/src/main/webapp/contactFormFragment.xhtml b/src/main/webapp/contactFormFragment.xhtml index cb4eb3d0872..470a137e6cf 100644 --- a/src/main/webapp/contactFormFragment.xhtml +++ b/src/main/webapp/contactFormFragment.xhtml @@ -37,7 +37,7 @@

          + validator="#{sendFeedbackDialog.validateUserEmail}" validatorMessage="#{bundle['contact.from.invalid']}" requiredMessage="#{bundle['contact.from.required']}" required="#{param['DO_VALIDATION']}" placeholder="#{bundle['contact.from.emailPlaceholder']}"/>
          @@ -81,7 +81,7 @@
          + update="@form,messagePanel" oncomplete="if (args && !args.validationFailed) PF('contactForm').hide();" actionListener="#{sendFeedbackDialog.sendMessage}">
          -
          +
           
          -
          +
          @@ -267,7 +270,7 @@
          -
          +
          -
          +
          -

          +

          -

          +

          diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 67dcf89c380..34c6d3dcbea 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -178,11 +178,12 @@
        • + oncomplete="showPopup(false);"> #{bundle.download} +
        • @@ -191,12 +192,13 @@
        • #{bundle.downloadOriginal} +
        • @@ -206,7 +208,7 @@
        • - #{bundle.downloadArchival} @@ -228,9 +230,14 @@
        • - - - + + #{bundle.transfer} + + + + +
        • @@ -453,6 +460,17 @@ + + + + +
        • + + + +
        • +
          +
        • @@ -538,7 +556,7 @@
          - + @@ -560,14 +578,14 @@ data-toggle="tooltip" data-placement="auto top" data-original-title="#{bundle['metrics.dataset.downloads.makedatacount.tip']}"> - + - )
          -
          +
          @@ -990,6 +1008,19 @@
          + +

          + +

          +
          + + + + +
          +

          @@ -1059,16 +1090,38 @@

          #{bundle['dataset.downloadUnrestricted']}

          + rendered="#{DatasetPage.guestbookAndTermsPopupRequired and !settingsWrapper.rsyncDownload}" + oncomplete="PF('guestbookAndTermsPopup').show();" />
          + +

          #{bundle['dataset.mixedSelectedFilesForTransfer']}

          + + + + + + +
          #{resFile.label}
          +
          +

          #{bundle['dataset.transferUnrestricted']}

          + + + +
          +

          #{bundle['file.deleteDialog.tip']}

          @@ -1168,7 +1221,12 @@ -

          #{bundle['file.deaccessionDialog.tip']}

          +
          +   +


          + +

          +
          - + - + - + + + + + + @@ -1541,19 +1604,11 @@ - + - - - - - - - -
          @@ -1600,6 +1655,12 @@
          +
          + +   + + +
          #{bundle['dataset.rejectMessage']}

          - - - - -

          - -

          - -
          + + +

          + +

          + + +
          - + + +
          - + @@ -1883,10 +1947,14 @@ $('button[id$="updateOwnerDataverse"]').trigger('click'); } - function showPopup() { + function showPopup(isTransfer) { var outcome = document.getElementById("datasetForm:validateFilesOutcome").value; if (outcome ==='Mixed'){ - PF('downloadMixed').show(); + if(isTransfer) { + PF('globusTransferMixed').show(); + } else { + PF('downloadMixed').show(); + } } if (outcome ==='FailEmpty'){ PF('selectFilesForDownload').show(); @@ -1898,7 +1966,7 @@ PF('downloadInvalid').show(); } if (outcome ==='GuestbookRequired'){ - PF('downloadPopup').show(); + PF('guestbookAndTermsPopup').show(); } } diff --git a/src/main/webapp/dataverse.xhtml b/src/main/webapp/dataverse.xhtml index aa3fa535807..7f70f28e194 100644 --- a/src/main/webapp/dataverse.xhtml +++ b/src/main/webapp/dataverse.xhtml @@ -252,6 +252,8 @@
          +
          +
          #{bundle.description}
          +
          +
          + + #{bundle.guestbookEntryOption} + + +
          + + + + +
          +
          +
          + + #{bundle.pidProviderOption} + + +
          + + + + +
          +
          +

        • @@ -441,7 +471,7 @@
          - +
          diff --git a/src/main/webapp/dataverseuser.xhtml b/src/main/webapp/dataverseuser.xhtml index 51f5bfa9f8a..9ed8b5209b6 100644 --- a/src/main/webapp/dataverseuser.xhtml +++ b/src/main/webapp/dataverseuser.xhtml @@ -178,9 +178,6 @@ #{DataverseUserPage.getRequestorEmail(item)} - - #{DataverseUserPage.getReasonForReturn(item.theObject)} - @@ -236,6 +233,14 @@ + + + + + #{item.theObject.displayName} + + + diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index 5fac8241f13..6fab335c0f3 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -91,6 +91,11 @@ rendered="#{!EditDatafilesPage.isUnlimitedUploadFileSize()}"> + + + + @@ -154,7 +159,7 @@ dragDropSupport="true" auto="#{!(systemConfig.directUploadEnabled(EditDatafilesPage.dataset))}" multiple="#{datasetPage || EditDatafilesPage.allowMultipleFileUpload()}" - disabled="#{lockedFromEdits || !(datasetPage || EditDatafilesPage.showFileUploadComponent()) }" + disabled="#{lockedFromEdits || !(datasetPage || EditDatafilesPage.showFileUploadComponent()) || EditDatafilesPage.isQuotaExceeded()}" listener="#{EditDatafilesPage.handleFileUpload}" process="filesTable" update=":datasetForm:filesTable, @([id$=filesButtons])" @@ -166,6 +171,7 @@ fileLimit="#{EditDatafilesPage.getMaxNumberOfFiles()}" invalidSizeMessage="#{bundle['file.edit.error.file_exceeds_limit']}" sequential="true" + previewWidth="-1" widgetVar="fileUploadWidget"> @@ -371,13 +377,13 @@
          - - + #{fileMetadata.label} diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index f28efc47705..9c29fd777a1 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -24,14 +24,15 @@ - - - #{fileMetadata.dataFile.containsFileAccessRequestFromUser(dataverseSession.user) ? bundle['file.accessRequested'] : bundle['file.requestAccess']} + disabled="#{fileMetadata.dataFile.containsActiveFileAccessRequestFromUser(dataverseSession.user)}"> + + #{fileMetadata.dataFile.containsActiveFileAccessRequestFromUser(dataverseSession.user) ? bundle['file.accessRequested'] : bundle['file.requestAccess']}
        • @@ -59,8 +60,8 @@ -
        • - + #{bundle['file.globus.of']} #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType} - + update="@widgetVar(guestbookAndTermsPopup)" oncomplete="PF('guestbookAndTermsPopup').show();handleResizeDialog('guestbookAndTermsPopup');"> + - GT: #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType} + #{bundle['file.globus.of']} #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType}
        • - #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType} - + update="@widgetVar(guestbookAndTermsPopup)" oncomplete="PF('guestbookAndTermsPopup').show();handleResizeDialog('guestbookAndTermsPopup');"> + #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType} - #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType} - + update="@widgetVar(guestbookAndTermsPopup)" oncomplete="PF('guestbookAndTermsPopup').show();handleResizeDialog('guestbookAndTermsPopup');"> + #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType} @@ -134,23 +138,24 @@
        • - #{bundle['file.downloadBtn.format.all']} - + update="@widgetVar(guestbookAndTermsPopup)" + oncomplete="PF('guestbookAndTermsPopup').show();handleResizeDialog('guestbookAndTermsPopup');"> + #{bundle['file.downloadBtn.format.all']}
        • - @@ -158,12 +163,13 @@ - + update="@widgetVar(guestbookAndTermsPopup)" + oncomplete="PF('guestbookAndTermsPopup').show();handleResizeDialog('guestbookAndTermsPopup');"> + @@ -171,35 +177,37 @@
        • - #{bundle['file.downloadBtn.format.tab']} - + update="@widgetVar(guestbookAndTermsPopup)" + oncomplete="PF('guestbookAndTermsPopup').show();handleResizeDialog('guestbookAndTermsPopup');"> + #{bundle['file.downloadBtn.format.tab']}
        • - #{bundle['file.downloadBtn.format.rdata']} - + update="@widgetVar(guestbookAndTermsPopup)" + oncomplete="PF('guestbookAndTermsPopup').show();handleResizeDialog('guestbookAndTermsPopup');"> + #{bundle['file.downloadBtn.format.rdata']} @@ -215,18 +223,19 @@
        • - #{bundle['file.downloadBtn.format.var']} - + update="@widgetVar(guestbookAndTermsPopup)" + oncomplete="PF('guestbookAndTermsPopup').show();handleResizeDialog('guestbookAndTermsPopup');"> + #{bundle['file.downloadBtn.format.var']}
        • @@ -303,20 +312,21 @@
        • - #{tool.getDisplayNameLang()} - + update="@widgetVar(guestbookAndTermsPopup)" + oncomplete="PF('guestbookAndTermsPopup').show();handleResizeDialog('guestbookAndTermsPopup');"> + #{tool.getDisplayNameLang()}
        • diff --git a/src/main/webapp/file-edit-button-fragment.xhtml b/src/main/webapp/file-edit-button-fragment.xhtml index 4dac1613266..30c3f6e7938 100644 --- a/src/main/webapp/file-edit-button-fragment.xhtml +++ b/src/main/webapp/file-edit-button-fragment.xhtml @@ -77,6 +77,22 @@
          + + + +
        • + + + +
        • +
          + +
        • + + + +
        • +
          diff --git a/src/main/webapp/file-info-fragment.xhtml b/src/main/webapp/file-info-fragment.xhtml index 33a8d2c3ca5..72fe279fbf8 100644 --- a/src/main/webapp/file-info-fragment.xhtml +++ b/src/main/webapp/file-info-fragment.xhtml @@ -28,8 +28,8 @@
          - - + @@ -67,7 +67,7 @@
          - +
          diff --git a/src/main/webapp/file-request-access-popup-fragment.xhtml b/src/main/webapp/file-request-access-popup-fragment.xhtml deleted file mode 100644 index 6541d86b686..00000000000 --- a/src/main/webapp/file-request-access-popup-fragment.xhtml +++ /dev/null @@ -1,53 +0,0 @@ - - - -

          - #{someActivelyEmbargoedFiles ? bundle['file.requestAccessTermsDialog.embargoed.tip'] : bundle['file.requestAccessTermsDialog.tip']} -

          -

          - #{bundle['file.requestAccessTermsDialog.embargoed']} -

          -
          -
          - -
          -
          -
          - -
          -
          -
          -
          -
          - -
          -
          -
          - -
          -
          -
          -
          -
          -
          - - - -
          -
          diff --git a/src/main/webapp/file.xhtml b/src/main/webapp/file.xhtml index 812786be781..bcd48fd1f32 100644 --- a/src/main/webapp/file.xhtml +++ b/src/main/webapp/file.xhtml @@ -204,7 +204,7 @@ or FilePage.fileMetadata.dataFile.filePackage and systemConfig.HTTPDownload}"> - + @@ -214,6 +214,7 @@ + @@ -297,7 +298,7 @@
          - + @@ -305,7 +306,7 @@
          - + @@ -353,20 +354,22 @@ - - + + - + + + - + - +
        - +
        - + + - + update="@widgetVar(guestbookAndTermsPopup)" + oncomplete="PF('guestbookAndTermsPopup').show();handleResizeDialog('guestbookAndTermsPopup');"> + @@ -629,46 +635,31 @@
        - - - - - - - - - - - - + - + - + - + + + + + + + - - -

        - #{bundle['file.mapData.unpublished.message']} -

        -
        - -
        -
        +
        @@ -683,7 +674,7 @@

        -
        diff --git a/src/main/webapp/filesFragment.xhtml b/src/main/webapp/filesFragment.xhtml index 6d3c6062ec7..58899ab7062 100644 --- a/src/main/webapp/filesFragment.xhtml +++ b/src/main/webapp/filesFragment.xhtml @@ -436,53 +436,68 @@
        + and !(DatasetPage.isVersionHasTabular()||DatasetPage.isVersionHasGlobus())}"> + update="@form" oncomplete="showPopup(false);"> + #{bundle.download}
        -
        + and (DatasetPage.isVersionHasTabular()||DatasetPage.isVersionHasGlobus())}">
        + styleClass="btn btn-default btn-request" + update="@form, @([id$=messagePanel])" + action="#{DatasetPage.validateFilesForRequestAccess()}" + disabled="#{DatasetPage.locked or !DatasetPage.fileAccessRequestMultiButtonEnabled}"> + #{bundle['file.requestAccess']} -
        +
        #{bundle['file.accessRequested']} 
        @@ -548,15 +565,17 @@ - - + + + + diff --git a/src/main/webapp/globus.xhtml b/src/main/webapp/globus.xhtml deleted file mode 100644 index f4eebd4babf..00000000000 --- a/src/main/webapp/globus.xhtml +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - - - - - - - - - - - - - - diff --git a/src/main/webapp/file-download-popup-fragment.xhtml b/src/main/webapp/guestbook-terms-popup-fragment.xhtml similarity index 83% rename from src/main/webapp/file-download-popup-fragment.xhtml rename to src/main/webapp/guestbook-terms-popup-fragment.xhtml index 632c2a827ef..5948047d845 100644 --- a/src/main/webapp/file-download-popup-fragment.xhtml +++ b/src/main/webapp/guestbook-terms-popup-fragment.xhtml @@ -7,22 +7,24 @@ xmlns:o="http://omnifaces.org/ui" xmlns:jsf="http://xmlns.jcp.org/jsf" xmlns:iqbs="http://xmlns.jcp.org/jsf/composite/iqbs"> - + - -

        - #{bundle['file.downloadDialog.tip']} -

        - -
        - -
        -
        - + +

        + #{someActivelyEmbargoedFiles ? bundle['file.requestAccessTermsDialog.embargoed.tip'] : bundle['file.requestAccessTermsDialog.tip']} +

        +

        + #{bundle['file.requestAccessTermsDialog.embargoed']} +

        +
        + +

        + #{bundle['file.downloadDialog.tip']} +

        +
        -
        @@ -53,7 +55,6 @@

        -
        - + +
        + +
        + + + + + - + - -
        +
        + + + + + + + + + + +
        - \ No newline at end of file + diff --git a/src/main/webapp/manage-templates.xhtml b/src/main/webapp/manage-templates.xhtml index c9841ace8e8..879cf9e55c2 100644 --- a/src/main/webapp/manage-templates.xhtml +++ b/src/main/webapp/manage-templates.xhtml @@ -139,7 +139,7 @@ + styleClass="largePopUp" widgetVar="deleteConfirmation" modal="true" focus="contDeleteTemplateBtn">

         

        diff --git a/src/main/webapp/permissions-manage-files.xhtml b/src/main/webapp/permissions-manage-files.xhtml index d3109da69a6..4e4e56f2051 100644 --- a/src/main/webapp/permissions-manage-files.xhtml +++ b/src/main/webapp/permissions-manage-files.xhtml @@ -322,7 +322,7 @@ diff --git a/src/main/webapp/resources/css/structure.css b/src/main/webapp/resources/css/structure.css index 470c07d4534..b81cf2a2c47 100644 --- a/src/main/webapp/resources/css/structure.css +++ b/src/main/webapp/resources/css/structure.css @@ -483,7 +483,7 @@ span.search-term-match {font-weight: bold;} [id$='resultsTable'] div.card-title-icon-block span.label {vertical-align:15%} [id$='resultsTable'] div.card-preview-icon-block {width:48px; float:left; margin:4px 12px 6px 0;} [id$='resultsTable'] div.card-preview-icon-block a {display:block; height:48px; line-height:48px;} -[id$='resultsTable'] div.card-preview-icon-block img {vertical-align:middle;} +[id$='resultsTable'] div.card-preview-icon-block img {vertical-align:middle; max-width: 64px; max-height: 48px; padding-right: 10px;} [id$='resultsTable'] div.card-preview-icon-block span[class^='icon'], [id$='resultsTable'] div.card-preview-icon-block span[class^='glyphicon'] {font-size:2.8em;} diff --git a/src/main/webapp/roles-assign.xhtml b/src/main/webapp/roles-assign.xhtml index 4b31f10dbfc..93b9862c55d 100644 --- a/src/main/webapp/roles-assign.xhtml +++ b/src/main/webapp/roles-assign.xhtml @@ -31,7 +31,7 @@ styleClass="DropdownPopup" panelStyleClass="DropdownPopupPanel" var="roleAssignee" itemLabel="#{roleAssignee.displayInfo.title}" itemValue="#{roleAssignee}" converter="roleAssigneeConverter"> - + diff --git a/src/main/webapp/search-include-fragment.xhtml b/src/main/webapp/search-include-fragment.xhtml index 718df813348..af568170157 100644 --- a/src/main/webapp/search-include-fragment.xhtml +++ b/src/main/webapp/search-include-fragment.xhtml @@ -36,7 +36,7 @@ - + @@ -88,15 +88,26 @@
        + + + +
        +
        +
        + +
        +
        +
        +
        -
        +