diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index cf982992c03..b297dfc4ee8 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,11 +1,13 @@ --- name: Bug report -about: Did you encounter something unexpected or incorrect in the Dataverse software? We'd like to hear about it! +about: Did you encounter something unexpected or incorrect in the Dataverse software? + We'd like to hear about it! title: '' labels: '' assignees: '' --- + + + + + + + diff --git a/doc/JAVADOC_GUIDE.md b/doc/JAVADOC_GUIDE.md index 8001abda248..997c40e1624 100644 --- a/doc/JAVADOC_GUIDE.md +++ b/doc/JAVADOC_GUIDE.md @@ -88,7 +88,7 @@ Here's a better approach: /** The dataverse we move the dataset from */ private Dataverse sourceDataverse; - /** The dataverse we movet the dataset to */ + /** The dataverse we move the dataset to */ private Dataverse destinationDataverse; diff --git a/doc/mergeParty/readme.md b/doc/mergeParty/readme.md index 061673fffa0..6f3af8511dc 100644 --- a/doc/mergeParty/readme.md +++ b/doc/mergeParty/readme.md @@ -73,10 +73,10 @@ Note that before we were asking `isGuest` and now we ask `isAuthenticated`, so t ## Other Added Things ### Settings bean -Settings (in `edu.harvard.iq.dataverse.settings`) are where the application stores its more complex, admin-editable configuration. Technically, its a persistent `Map`, that can be accessed via API (`edu.harvard.iq.dataverse.api.Admin`, on path `{server}/api/s/settings`). Currenly used for the signup mechanism. +Settings (in `edu.harvard.iq.dataverse.settings`) are where the application stores its more complex, admin-editable configuration. Technically, its a persistent `Map`, that can be accessed via API (`edu.harvard.iq.dataverse.api.Admin`, on path `{server}/api/s/settings`). Currently used for the signup mechanism. ### Admin API -Accessible under url `{server}/api/s/`, API calls to this bean should be editing confugurations, allowing full indexing and more. The idea behing putting all of them under the `/s/` path is that we can later block these calls using a filter. This way, we could, say, allow access from localhost only. Or, we could block this completely based on some environemnt variable. +Accessible under url `{server}/api/s/`, API calls to this bean should be editing configurations, allowing full indexing and more. The idea behind putting all of them under the `/s/` path is that we can later block these calls using a filter. This way, we could, say, allow access from localhost only. Or, we could block this completely based on some environment variable. ### `setup-all.sh` script A new script that sets up the users and the dataverses, sets the system up for built-in signup, and then indexes the dataverses using solr. Requires the [jq utility](http://stedolan.github.io/jq/). On Macs with [homebrew](http://brew.sh) installed, getting this utility is a `brew install jq` command away. diff --git a/doc/release-notes/1249-collapse_dataverse_description.md b/doc/release-notes/1249-collapse_dataverse_description.md new file mode 100644 index 00000000000..8fe933005de --- /dev/null +++ b/doc/release-notes/1249-collapse_dataverse_description.md @@ -0,0 +1 @@ +Long descriptions for collections are now truncated but can be expanded to read the full description. diff --git a/doc/release-notes/5.13-release-notes.md b/doc/release-notes/5.13-release-notes.md new file mode 100644 index 00000000000..0463b7d18a3 --- /dev/null +++ b/doc/release-notes/5.13-release-notes.md @@ -0,0 +1,262 @@ +# Dataverse Software 5.13 + +This release brings new features, enhancements, and bug fixes to the Dataverse software. Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. + +## Release Highlights + +### Schema.org Improvements (Some Backward Incompatibility) + +The Schema.org metadata used as an export format and also embedded in dataset pages has been updated to improve compliance with Schema.org's schema and Google's recommendations for Google Dataset Search. + +Please be advised that these improvements have the chance to break integrations that rely on the old, less compliant structure. For details see the "backward incompatibility" section below. (Issue #7349) + +### Folder Uploads via Web UI (dvwebloader, S3 only) + +For installations using S3 for storage and with direct upload enabled, a new tool called [DVWebloader](https://github.com/gdcc/dvwebloader) can be enabled that allows web users to upload a folder with a hierarchy of files and subfolders while retaining the relative paths of files (similarly to how the DVUploader tool does it on the command line, but with the convenience of using the browser UI). See [Folder Upload](https://guides.dataverse.org/en/5.13/user/dataset-management.html#folder-upload) in the User Guide for details. (PR #9096) + +### Long Descriptions of Collections (Dataverses) are Now Truncated + +Like datasets, long descriptions of collections (dataverses) are now truncated by default but can be expanded with a "read full description" button. (PR #9222) + +### License Sorting + +Licenses as shown in the dropdown in UI can be now sorted by the superusers. See [Sorting Licenses](https://guides.dataverse.org/en/5.13/installation/config.html#sorting-licenses) section of the Installation Guide for details. (PR #8697) + +### Metadata Field Production Location Now Repeatable, Facetable, and Enabled for Advanced Search + +Depositors can now click the plus sign to enter multiple instances of the metadata field "Production Location" in the citation metadata block. Additionally this field now appears on the Advanced Search page and can be added to the list of search facets. (PR #9254) + +### Support for NetCDF and HDF5 Files + + NetCDF and HDF5 files are now detected based on their content rather than just their file extension. Both "classic" NetCDF 3 files and more modern NetCDF 4 files are detected based on content. Detection for older HDF4 files is only done through the file extension ".hdf", as before. + +For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML (XML) format and save it as an auxiliary file. There is a new NcML previewer available in the [dataverse-previewers](https://github.com/gdcc/dataverse-previewers) repo. + +An [extractNcml](https://guides.dataverse.org/en/5.13/api/native-api.html#extract-ncml) API endpoint has been added, especially for installations with existing NetCDF and HDF5 files. After upgrading, they can iterate through these files and try to extract an NcML file. + +See the [NetCDF and HDF5](https://guides.dataverse.org/en/5.13/user/dataset-management.html#netcdf-and-hdf5) section of the User Guide for details. (PR #9239) + +### Support for .eln Files (Electronic Laboratory Notebooks) + +The [.eln file format](https://github.com/TheELNConsortium/TheELNFileFormat) is used by Electronic Laboratory Notebooks as an exchange format for experimental protocols, results, sample descriptions, etc... + +### Improved Security for External Tools + +External tools can now be configured to use signed URLs to access the Dataverse API as an alternative to API tokens. This eliminates the need for tools to have access to the user's API token in order to access draft or restricted datasets and datafiles. Signed URLs can be transferred via POST or via a callback when triggering a tool via GET. See [Authorization Options](https://guides.dataverse.org/en/5.13/api/external-tools.html#authorization-options) in the External Tools documentation for details. (PR #9001) + +### Geospatial Search (API Only) + +Geospatial search is supported via the Search API using two new [parameters](https://guides.dataverse.org/en/5.13/api/search.html#parameters): `geo_point` and `geo_radius`. + +The fields that are geospatially indexed are "West Longitude", "East Longitude", "North Latitude", and "South Latitude" from the "Geographic Bounding Box" field in the geospatial metadata block. (PR #8239) + +### Reproducibility and Code Execution with Binder + +Binder has been added to the list of external tools that can be added to a Dataverse installation. From the dataset page, you can launch Binder, which spins up a computational environment in which you can explore the code and data in the dataset, or write new code, such as a Jupyter notebook. (PR #9341) + +### CodeMeta (Software) Metadata Support (Experimental) + +Experimental support for research software metadata deposits has been added. + +By adding a metadata block for [CodeMeta](https://codemeta.github.io), we take another step toward adding first class support of diverse FAIR objects, such as research software and computational workflows. + +There is more work underway to make Dataverse installations around the world "research software ready." + +**Note:** Like the metadata block for computational workflows before, CodeMeta is listed under [Experimental Metadata](https://guides.dataverse.org/en/5.13/user/appendix.html#experimental-metadata) in the guides. Experimental means it's brand new, opt-in, and might need future tweaking based on experience of usage in the field. We hope for feedback from installations on the new metadata block to optimize and lift it from the experimental stage. (PR #7877) + +### Mechanism Added for Stopping a Harvest in Progress + +It is now possible for a sysadmin to stop a long-running harvesting job. See [Harvesting Clients](https://guides.dataverse.org/en/5.13/admin/harvestclients.html#how-to-stop-a-harvesting-run-in-progress) in the Admin Guide for more information. (PR #9187) + +### API Endpoint Listing Metadata Block Details has been Extended + +The API endpoint `/api/metadatablocks/{block_id}` has been extended to include the following fields: + +- `controlledVocabularyValues` - All possible values for fields with a controlled vocabulary. For example, the values "Agricultural Sciences", "Arts and Humanities", etc. for the "Subject" field. +- `isControlledVocabulary`: Whether or not this field has a controlled vocabulary. +- `multiple`: Whether or not the field supports multiple values. + +See [Metadata Blocks](https://guides.dataverse.org/en/5.13/api/native-api.html#metadata-blocks-api) in the API Guide for details. (PR #9213) + +### Advanced Database Settings + +You can now enable advanced database connection pool configurations useful for debugging and monitoring as well as other settings. Of particular interest may be `sslmode=require`. See the new [Database Persistence](https://guides.dataverse.org/en/5.13/installation/config.html#database-persistence) section of the Installation Guide for details. (PR #8915) + +### Support for Cleaning up Leftover Files in Dataset Storage + +Experimental feature: the leftover files stored in the Dataset storage location that are not in the file list of that Dataset, but are named following the Dataverse technical convention for dataset files, can be removed with the new [Cleanup Storage of a Dataset](https://guides.dataverse.org/en/5.13/api/native-api.html#cleanup-storage-of-a-dataset) API endpoint. + +### OAI Server Bug Fixed + +A bug introduced in 5.12 was preventing the Dataverse OAI server from serving incremental harvesting requests from clients. It was fixed in this release (PR #9316). + +## Major Use Cases and Infrastructure Enhancements + +Changes and fixes in this release not already mentioned above include: + +- Administrators can configure an alternative storage location where files uploaded via the UI are temporarily stored during the transfer from client to server. (PR #8983, See also [Configuration Guide](http://guides.dataverse.org/en/5.13/installation/config.html#temporary-upload-file-storage)) +- To improve performance, Dataverse estimates download counts. This release includes an update that makes the estimate more accurate. (PR #8972) +- Direct upload and out-of-band uploads can now be used to replace multiple files with one API call (complementing the prior ability to add multiple new files). (PR #9018) +- A persistent identifier, [CSRT](https://www.cstr.cn/search/specification/), is added to the Related Publication field's ID Type child field. For datasets published with CSRT IDs, Dataverse will also include them in the datasets' Schema.org metadata exports. (Issue #8838) +- Datasets that are part of linked dataverse collections will now be displayed in their linking dataverse collections. + +## New JVM Options and MicroProfile Config Options + +The following JVM option is now available: + +- `dataverse.personOrOrg.assumeCommaInPersonName` - the default is false + +The following MicroProfile Config options are now available (these can be treated as JVM options): + +- `dataverse.files.uploads` - alternative storage location of generated temporary files for UI file uploads +- `dataverse.api.signing-secret` - used by signed URLs +- `dataverse.solr.host` +- `dataverse.solr.port` +- `dataverse.solr.protocol` +- `dataverse.solr.core` +- `dataverse.solr.path` +- `dataverse.rserve.host` + +The following existing JVM options are now available via MicroProfile Config: + +- `dataverse.siteUrl` +- `dataverse.fqdn` +- `dataverse.files.directory` +- `dataverse.rserve.host` +- `dataverse.rserve.port` +- `dataverse.rserve.user` +- `dataverse.rserve.password` +- `dataverse.rserve.tempdir` + +## Notes for Developers and Integrators + +See the "Backward Incompatibilities" section below. + +## Backward Incompatibilities + +### Schema.org + +The following changes have been made to Schema.org exports (necessary for the improvements mentioned above): + +- Descriptions are now joined and truncated to less than 5K characters. +- The "citation"/"text" key has been replaced by a "citation"/"name" key. +- File entries now have the mimetype reported as 'encodingFormat' rather than 'fileFormat' to better conform with the Schema.org specification for DataDownload entries. Download URLs are now sent for all files unless the dataverse.files.hide-schema-dot-org-download-urls setting is set to true. +- Author/creators now have an @type of Person or Organization and any affiliation (affiliation for Person, parentOrganization for Organization) is now an object of @type Organization + +### License Files + +License files are now required to contain the new "sortOrder" column. When attempting to create a new license without this field, an error would be returned. See [Configuring Licenses](https://guides.dataverse.org/en/5.13/installation/config.html#configuring-licenses) section of the Installation Guide for reference. + +## Complete List of Changes + +For the complete list of code changes in this release, see the [5.13 milestone](https://github.com/IQSS/dataverse/milestone/107?closed=1) on GitHub. + +## Installation + +If this is a new installation, please see our [Installation Guide](https://guides.dataverse.org/en/5.13/installation/). Please don't be shy about [asking for help](https://guides.dataverse.org/en/5.13/installation/intro.html#getting-help) if you need it! + +After your installation has gone into production, you are welcome to add it to our [map of installations](https://dataverse.org/installations) by opening an issue in the [dataverse-installations](https://github.com/IQSS/dataverse-installations) repo. + +## Upgrade Instructions + +0\. These instructions assume that you've already successfully upgraded from version 4.x to 5.0 of the Dataverse software following the instructions in the [release notes for version 5.0](https://github.com/IQSS/dataverse/releases/tag/v5.0). After upgrading from the 4.x series to 5.0, you should progress through the other 5.x releases before attempting the upgrade to 5.13. + +If you are running Payara as a non-root user (and you should be!), **remember not to execute the commands below as root**. Use `sudo` to change to that user first. For example, `sudo -i -u dataverse` if `dataverse` is your dedicated application user. + +In the following commands we assume that Payara 5 is installed in `/usr/local/payara5`. If not, adjust as needed. + +`export PAYARA=/usr/local/payara5` + +(or `setenv PAYARA /usr/local/payara5` if you are using a `csh`-like shell) + +1\. Undeploy the previous version. + +- `$PAYARA/bin/asadmin list-applications` +- `$PAYARA/bin/asadmin undeploy dataverse<-version>` + +2\. Stop Payara and remove the generated directory + +- `service payara stop` +- `rm -rf $PAYARA/glassfish/domains/domain1/generated` + +3\. Start Payara + +- `service payara start` + +4\. Deploy this version. + +- `$PAYARA/bin/asadmin deploy dataverse-5.13.war` + +5\. Restart Payara + +- `service payara stop` +- `service payara start` + +6\. Reload citation metadata block + +- `wget https://github.com/IQSS/dataverse/releases/download/v5.13/citation.tsv` +- `curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @citation.tsv -H "Content-type: text/tab-separated-values"` + +If you are running an English-only installation, you are finished with the citation block. Otherwise, download the updated citation.properties file and place in the [`dataverse.lang.directory`](https://guides.dataverse.org/en/5.13/installation/config.html#configuring-the-lang-directory). + +- `wget https://github.com/IQSS/dataverse/releases/download/v5.13/citation.properties` +- `cp citation.properties /home/dataverse/langBundles` + +7\. Replace Solr schema.xml to allow multiple production locations and support for geospatial indexing to be used. See specific instructions below for those installations without custom metadata blocks (1a) and those with custom metadata blocks (1b). + +Note: with this release support for indexing of the experimental workflow metadata block has been removed from the standard schema.xml. +If you are using the workflow metadata block be sure to follow the instructions in step 7b) below to maintain support for indexing workflow metadata. + +7a\. For installations without custom or experimental metadata blocks: + +- Stop Solr instance (usually service solr stop, depending on Solr installation/OS, see the [Installation Guide](https://guides.dataverse.org/en/5.13/installation/prerequisites.html#solr-init-script) + +- Replace schema.xml + + - `cp /tmp/dvinstall/schema.xml /usr/local/solr/solr-8.11.1/server/solr/collection1/conf` + +- Start solr instance (usually service solr start, depending on Solr/OS) + +7b\. For installations with custom or experimental metadata blocks: + +- Stop solr instance (usually service solr stop, depending on solr installation/OS, see the [Installation Guide](https://guides.dataverse.org/en/5.13/installation/prerequisites.html#solr-init-script) + +- Edit the following line to your schema.xml (to indicate that productionPlace is now multiValued='true"): + + `` + +- Add the following lines to your schema.xml to add support for geospatial indexing: + + `` + `` + `` + `` + `` + `` + `` + +- Restart Solr instance (usually service solr start, depending on solr/OS) + +### Optional Upgrade Step: Reindex Linked Dataverse Collections + +Datasets that are part of linked dataverse collections will now be displayed in +their linking dataverse collections. In order to fix the display of collections +that have already been linked you must re-index the linked collections. This +query will provide a list of commands to re-index the effected collections: + +``` +select 'curl http://localhost:8080/api/admin/index/dataverses/' +|| tmp.dvid from (select distinct dataverse_id as dvid +from dataverselinkingdataverse) as tmp +``` + +The result of the query will be a list of re-index commands such as: + +`curl http://localhost:8080/api/admin/index/dataverses/633` + +where '633' is the id of the linked collection. + +### Optional Upgrade Step: Run File Detection on .eln Files + +Now that .eln files are recognized, you can run the [Redetect File Type](https://guides.dataverse.org/en/5.13/api/native-api.html#redetect-file-type) API on them to switch them from "unknown" to "ELN Archive". Afterward, you can reindex these files to make them appear in search facets. diff --git a/doc/sphinx-guides/SphinxRSTCheatSheet.md b/doc/sphinx-guides/SphinxRSTCheatSheet.md index 1ccd293080c..300260cb5b1 100755 --- a/doc/sphinx-guides/SphinxRSTCheatSheet.md +++ b/doc/sphinx-guides/SphinxRSTCheatSheet.md @@ -10,7 +10,7 @@ RST Cheat Sheet for Sphinx v 1.2.2 | Bold text | **text** | | | Italics/emphasis | *text* | | | literal | ``literal`` | | -| Internal cross-reference link | See section 5.3.1 of Sphinx documentationand example below | See section 5.3.1 of Sphinx documentationand example below | +| Internal cross-reference link | See section 5.3.1 of Sphinx documentation and example below | See section 5.3.1 of Sphinx documentation and example below | | code block | .. code-block:: guess | Allows for code blocks to be displayed properly | For more cheats please visit the [RST cheat sheet google doc] (https://docs.google.com/document/d/105H3iwPwgnPqwuMJI7q-h6FLtXV_EUCiwq2P13lADgA/edit?usp=sharing) \ No newline at end of file diff --git a/doc/sphinx-guides/requirements.txt b/doc/sphinx-guides/requirements.txt index 4488c54cd5e..028f07d11cb 100755 --- a/doc/sphinx-guides/requirements.txt +++ b/doc/sphinx-guides/requirements.txt @@ -1,5 +1,10 @@ -# current version as of this writing -Sphinx==3.5.4 +# Developers, please use Python 3.9 or lower to build the guides. +# For your convenience, a solution for Python 3.10 is provided below +# but we would prefer that you use the same version of Sphinx +# (below on the < 3.10 line) that is used to build the production guides. +Sphinx==3.5.4 ; python_version < '3.10' +Sphinx==5.3.0 ; python_version >= '3.10' + # Necessary workaround for ReadTheDocs for Sphinx 3.x - unnecessary as of Sphinx 4.5+ Jinja2>=3.0.2,<3.1 diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv index 61db5dfed93..b07ea8c4fd1 100644 --- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv +++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv @@ -1,5 +1,6 @@ Tool Type Scope Description Data Explorer explore file A GUI which lists the variables in a tabular data file allowing searching, charting and cross tabulation analysis. See the README.md file at https://github.com/scholarsportal/dataverse-data-explorer-v2 for the instructions on adding Data Explorer to your Dataverse. Whole Tale explore dataset A platform for the creation of reproducible research packages that allows users to launch containerized interactive analysis environments based on popular tools such as Jupyter and RStudio. Using this integration, Dataverse users can launch Jupyter and RStudio environments to analyze published datasets. For more information, see the `Whole Tale User Guide `_. -File Previewers explore file A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, text, video, tabular data, spreadsheets, and GeoJSON - allowing them to be viewed without downloading. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers +Binder explore dataset Binder allows you to spin up custom computing environments in the cloud (including Jupyter notebooks) with the files from your dataset. `Installation instructions `_ are in the Data Exploration Lab girder_ythub project. +File Previewers explore file A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, text, video, tabular data, spreadsheets, GeoJSON, zip, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers Data Curation Tool configure file A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions. diff --git a/doc/sphinx-guides/source/_static/api/add-license.json b/doc/sphinx-guides/source/_static/api/add-license.json index 969d6d58dab..a9d5dd34093 100644 --- a/doc/sphinx-guides/source/_static/api/add-license.json +++ b/doc/sphinx-guides/source/_static/api/add-license.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by/4.0", "shortDescription": "Creative Commons Attribution 4.0 International License.", "iconUrl": "https://i.creativecommons.org/l/by/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 2 } diff --git a/doc/sphinx-guides/source/_static/api/dataset-add-subject-metadata.json b/doc/sphinx-guides/source/_static/api/dataset-add-subject-metadata.json index ea0922dadc8..c81c5b32aab 100644 --- a/doc/sphinx-guides/source/_static/api/dataset-add-subject-metadata.json +++ b/doc/sphinx-guides/source/_static/api/dataset-add-subject-metadata.json @@ -2,7 +2,7 @@ "typeName": "subject", "value": ["Astronomy and Astrophysics", "Agricultural Sciences", -"Arts and Humanities", "Physics"] +"Arts and Humanities", "Physics", "Mathematical Sciences"] } diff --git a/doc/sphinx-guides/source/_static/api/ddi_dataset.xml b/doc/sphinx-guides/source/_static/api/ddi_dataset.xml index 05eaadc3458..679f82a3d8a 100644 --- a/doc/sphinx-guides/source/_static/api/ddi_dataset.xml +++ b/doc/sphinx-guides/source/_static/api/ddi_dataset.xml @@ -34,7 +34,8 @@ LastProducer1, FirstProducer1 LastProducer2, FirstProducer2 1003-01-01 - ProductionPlace + ProductionPlace One + ProductionPlace Two SoftwareName1 SoftwareName2 GrantInformationGrantNumber1 @@ -88,12 +89,12 @@ 10 20 - 30 - 40 + 40 + 30 - 80 - 70 + 70 + 80 60 50 diff --git a/doc/sphinx-guides/source/_static/installation/files/etc/shibboleth/shibboleth2.xml b/doc/sphinx-guides/source/_static/installation/files/etc/shibboleth/shibboleth2.xml index 41bf4709ba9..3960d003ad2 100644 --- a/doc/sphinx-guides/source/_static/installation/files/etc/shibboleth/shibboleth2.xml +++ b/doc/sphinx-guides/source/_static/installation/files/etc/shibboleth/shibboleth2.xml @@ -18,7 +18,7 @@ https://wiki.shibboleth.net/confluence/display/SHIB2/NativeSPConfiguration - + SAML2 SAML1 diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json new file mode 100644 index 00000000000..b188520dabb --- /dev/null +++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json @@ -0,0 +1,26 @@ +{ + "displayName": "AuxFileViewer", + "description": "Show an auxiliary file from a dataset file.", + "toolName": "auxPreviewer", + "scope": "file", + "types": [ + "preview" + ], + "toolUrl": "https://example.com/AuxFileViewer.html", + "toolParameters": { + "queryParameters": [ + { + "fileid": "{fileId}" + } + ] + }, + "requirements": { + "auxFilesExist": [ + { + "formatTag": "myFormatTag", + "formatVersion": "0.1" + } + ] + }, + "contentType": "application/foobar" +} diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json index e30c067a86b..47413c8a625 100644 --- a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json +++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json @@ -12,8 +12,16 @@ "PID": "{datasetPid}" }, { - "apiToken": "{apiToken}" + "locale":"{localeCode}" } - ] + ], + "allowedApiCalls": [ + { + "name":"retrieveDatasetJson", + "httpMethod":"GET", + "urlTemplate":"/api/v1/datasets/{datasetId}", + "timeOut":10 + } + ] } } diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json index 14f71a280b3..1c132576099 100644 --- a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json +++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json @@ -1,6 +1,6 @@ { "displayName": "Fabulous File Tool", - "description": "Fabulous Fun for Files!", + "description": "A non-existent tool that is fabulous fun for files!", "toolName": "fabulous", "scope": "file", "types": [ @@ -9,13 +9,25 @@ ], "toolUrl": "https://fabulousfiletool.com", "contentType": "text/tab-separated-values", + "httpMethod":"GET", "toolParameters": { "queryParameters": [ { "fileid": "{fileId}" }, { - "key": "{apiToken}" + "datasetPid": "{datasetPid}" + }, + { + "locale":"{localeCode}" + } + ], + "allowedApiCalls": [ + { + "name":"retrieveDataFile", + "httpMethod":"GET", + "urlTemplate":"/api/v1/access/datafile/{fileId}", + "timeOut":270 } ] } diff --git a/doc/sphinx-guides/source/admin/harvestclients.rst b/doc/sphinx-guides/source/admin/harvestclients.rst index c655d5af763..02783e4b97a 100644 --- a/doc/sphinx-guides/source/admin/harvestclients.rst +++ b/doc/sphinx-guides/source/admin/harvestclients.rst @@ -21,6 +21,23 @@ Clients are managed on the "Harvesting Clients" page accessible via the :doc:`da The process of creating a new, or editing an existing client, is largely self-explanatory. It is split into logical steps, in a way that allows the user to go back and correct the entries made earlier. The process is interactive and guidance text is provided. For example, the user is required to enter the URL of the remote OAI server. When they click *Next*, the application will try to establish a connection to the server in order to verify that it is working, and to obtain the information about the sets of metadata records and the metadata formats it supports. The choices offered to the user on the next page will be based on this extra information. If the application fails to establish a connection to the remote archive at the address specified, or if an invalid response is received, the user is given an opportunity to check and correct the URL they entered. +Note that as of 5.13, a new entry "Custom HTTP Header" has been added to the Step 1. of Create or Edit form. This optional field can be used to configure this client with a specific HTTP header to be added to every OAI request. This is to accommodate a (rare) use case where the remote server may require a special token of some kind in order to offer some content not available to other clients. Most OAI servers offer the same publicly-available content to all clients, so few admins will have a use for this feature. It is however on the very first, Step 1. screen in case the OAI server requires this token even for the "ListSets" and "ListMetadataFormats" requests, which need to be sent in the Step 2. of creating or editing a client. Multiple headers can be supplied separated by `\\n` - actual "backslash" and "n" characters, not a single "new line" character. + +How to Stop a Harvesting Run in Progress +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some harvesting jobs, especially the initial full harvest of a very large set - such as the default set of public datasets at IQSS - can take many hours. In case it is necessary to terminate such a long-running job, the following mechanism is provided (note that it is only available to a sysadmin with shell access to the application server): Create an empty file in the domain logs directory with the following name: ``stopharvest_.``, where ```` is the nickname of the harvesting client and ```` is the process id of the Application Server (Payara). This flag file needs to be owned by the same user that's running Payara, so that the application can remove it after stopping the job in progress. + +For example: + +.. code-block:: bash + + sudo touch /usr/local/payara5/glassfish/domains/domain1/logs/stopharvest_bigarchive.70916 + sudo chown dataverse /usr/local/payara5/glassfish/domains/domain1/logs/stopharvest_bigarchive.70916 + +Note: If the application server is stopped and restarted, any running harvesting jobs will be killed but may remain marked as in progress in the database. We thus recommend using the mechanism here to stop ongoing harvests prior to a server restart. + + What if a Run Fails? ~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst index b29e51b581d..1888fd89761 100644 --- a/doc/sphinx-guides/source/admin/integrations.rst +++ b/doc/sphinx-guides/source/admin/integrations.rst @@ -116,6 +116,8 @@ Binder Researchers can launch Jupyter Notebooks, RStudio, and other computational environments by entering the DOI of a dataset in a Dataverse installation on https://mybinder.org +A Binder button can also be added to every dataset page to launch Binder from there. See :doc:`external-tools`. + Institutions can self host BinderHub. The Dataverse Project is one of the supported `repository providers `_. Renku diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index 5f7cf85f714..9fb8626d4c4 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -386,12 +386,16 @@ Metadata Block Setup Now that you understand the TSV format used for metadata blocks, the next step is to attempt to make improvements to existing metadata blocks or create entirely new metadata blocks. For either task, you should have a Dataverse Software development environment set up for testing where you can drop the database frequently while you make edits to TSV files. Once you have tested your TSV files, you should consider making a pull request to contribute your improvement back to the community. +.. _exploring-metadata-blocks: + Exploring Metadata Blocks ~~~~~~~~~~~~~~~~~~~~~~~~~ -In addition to studying the TSV files themselves you might find the following highly experimental and subject-to-change API endpoints useful to understand the metadata blocks that have already been loaded into your Dataverse installation: +In addition to studying the TSV files themselves you will probably find the :ref:`metadata-blocks-api` API helpful in getting a structured dump of metadata blocks in JSON format. + +There are also a few older, highly experimental, and subject-to-change API endpoints under the "admin" API documented below but the public API above is preferred. -You can get a dump of metadata fields (yes, the output is odd, please open a issue) like this: +You can get a dump of metadata fields like this: ``curl http://localhost:8080/api/admin/datasetfield`` diff --git a/doc/sphinx-guides/source/admin/metadataexport.rst b/doc/sphinx-guides/source/admin/metadataexport.rst index 78b8c8ce223..200c3a3e342 100644 --- a/doc/sphinx-guides/source/admin/metadataexport.rst +++ b/doc/sphinx-guides/source/admin/metadataexport.rst @@ -57,3 +57,13 @@ Downloading Metadata via API ---------------------------- The :doc:`/api/native-api` section of the API Guide explains how end users can download the metadata formats above via API. + +Exporter Configuration +---------------------- + +Two exporters - Schema.org JSONLD and OpenAire - use an algorithm to determine whether an author, or contact, name belongs to a person or organization. While the algorithm works well, there are cases in which it makes mistakes, usually inferring that an organization is a person. + +The Dataverse software implements two jvm-options that can be used to tune the algorithm: + +- :ref:`dataverse.personOrOrg.assumeCommaInPersonName` - boolean, default false. If true, Dataverse will assume any name without a comma must be an organization. This may be most useful for curated Dataverse instances that enforce the "family name, given name" convention. +- :ref:`dataverse.personOrOrg.orgPhraseArray` - a JsonArray of strings. Any name that contains one of the strings is assumed to be an organization. For example, "Project" is a word that is not otherwise associated with being an organization. diff --git a/doc/sphinx-guides/source/api/external-tools.rst b/doc/sphinx-guides/source/api/external-tools.rst index d72a6f62004..eec9944338f 100644 --- a/doc/sphinx-guides/source/api/external-tools.rst +++ b/doc/sphinx-guides/source/api/external-tools.rst @@ -53,15 +53,21 @@ External tools must be expressed in an external tool manifest file, a specific J Examples of Manifests +++++++++++++++++++++ -Let's look at two examples of external tool manifests (one at the file level and one at the dataset level) before we dive into how they work. +Let's look at a few examples of external tool manifests (both at the file level and at the dataset level) before we dive into how they work. + +.. _tools-for-files: External Tools for Files ^^^^^^^^^^^^^^^^^^^^^^^^ -:download:`fabulousFileTool.json <../_static/installation/files/root/external-tools/fabulousFileTool.json>` is a file level both an "explore" tool and a "preview" tool that operates on tabular files: +:download:`fabulousFileTool.json <../_static/installation/files/root/external-tools/fabulousFileTool.json>` is a file level (both an "explore" tool and a "preview" tool) that operates on tabular files: .. literalinclude:: ../_static/installation/files/root/external-tools/fabulousFileTool.json +:download:`auxFileTool.json <../_static/installation/files/root/external-tools/auxFileTool.json>` is a file level preview tool that operates on auxiliary files associated with a data file (note the "requirements" section): + +.. literalinclude:: ../_static/installation/files/root/external-tools/auxFileTool.json + External Tools for Datasets ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -92,7 +98,9 @@ Terminology contentType File level tools operate on a specific **file type** (content type or MIME type such as "application/pdf") and this must be specified. Dataset level tools do not use contentType. - toolParameters **Query parameters** are supported and described below. + toolParameters **httpMethod**, **queryParameters**, and **allowedApiCalls** are supported and described below. + + httpMethod Either ``GET`` or ``POST``. queryParameters **Key/value combinations** that can be appended to the toolUrl. For example, once substitution takes place (described below) the user may be redirected to ``https://fabulousfiletool.com?fileId=42&siteUrl=http://demo.dataverse.org``. @@ -102,6 +110,20 @@ Terminology reserved words A **set of strings surrounded by curly braces** such as ``{fileId}`` or ``{datasetId}`` that will be inserted into query parameters. See the table below for a complete list. + allowedApiCalls An array of objects defining callbacks the tool is allowed to make to the Dataverse API. If the dataset or file being accessed is not public, the callback URLs will be signed to allow the tool access for a defined time. + + allowedApiCalls name A name the tool will use to identify this callback URL such as ``retrieveDataFile``. + + allowedApiCalls urlTemplate The relative URL for the callback using reserved words to indicate where values should by dynamically substituted such as ``/api/v1/datasets/{datasetId}``. + + allowedApiCalls httpMethod Which HTTP method the specified callback uses such as ``GET`` or ``POST``. + + allowedApiCalls timeOut For non-public datasets and datafiles, how many minutes the signed URLs given to the tool should be valid for. Must be an integer. + + requirements **Resources your tool needs to function.** For now, the only requirement you can specify is that one or more auxiliary files exist (see auxFilesExist in the :ref:`tools-for-files` example). Currently, requirements only apply to preview tools. If the requirements are not met, the preview tool is not shown. + + auxFilesExist **An array containing formatTag and formatVersion pairs** for each auxiliary file that your tool needs to download to function properly. For example, a required aux file could have a ``formatTag`` of "NcML" and a ``formatVersion`` of "1.0". See also :doc:`/developers/aux-file-support`. + toolName A **name** of an external tool that is used to differentiate between external tools and also used in bundle.properties for localization in the Dataverse installation web interface. For example, the toolName for Data Explorer is ``explorer``. For the Data Curation Tool the toolName is ``dct``. This is an optional parameter in the manifest JSON file. =========================== ========== @@ -131,6 +153,25 @@ Reserved Words ``{localeCode}`` optional The code for the language ("en" for English, "fr" for French, etc.) that user has selected from the language toggle in a Dataverse installation. See also :ref:`i18n`. =========================== ========== =========== +.. _api-exttools-auth: + +Authorization Options ++++++++++++++++++++++ + +When called for datasets or data files that are not public (i.e. in a draft dataset or for a restricted file), external tools are allowed access via the user's credentials. This is accomplished by one of two mechanisms: + +* Signed URLs (more secure, recommended) + + - Configured via the ``allowedApiCalls`` section of the manifest. The tool will be provided with signed URLs allowing the specified access to the given dataset or datafile for the specified amount of time. The tool will not be able to access any other datasets or files the user may have access to and will not be able to make calls other than those specified. + - For tools invoked via a GET call, Dataverse will include a callback query parameter with a Base64 encoded value. The decoded value is a signed URL that can be called to retrieve a JSON response containing all of the queryParameters and allowedApiCalls specified in the manfiest. + - For tools invoked via POST, Dataverse will send a JSON body including the requested queryParameters and allowedApiCalls. Dataverse expects the response to the POST to indicate a redirect which Dataverse will use to open the tool. + +* API Token (deprecated, less secure, not recommended) + + - Configured via the ``queryParameters`` by including an ``{apiToken}`` value. When this is present Dataverse will send the user's apiToken to the tool. With the user's API token, the tool can perform any action via the Dataverse API that the user could. External tools configured via this method should be assessed for their trustworthiness. + - For tools invoked via GET, this will be done via a query parameter in the request URL which could be cached in the browser's history. Dataverse expects the response to the POST to indicate a redirect which Dataverse will use to open the tool. + - For tools invoked via POST, Dataverse will send a JSON body including the apiToken. + Internationalization of Your External Tool ++++++++++++++++++++++++++++++++++++++++++ diff --git a/doc/sphinx-guides/source/api/metrics.rst b/doc/sphinx-guides/source/api/metrics.rst index 6a878d73a98..f1eb1f88c71 100755 --- a/doc/sphinx-guides/source/api/metrics.rst +++ b/doc/sphinx-guides/source/api/metrics.rst @@ -72,7 +72,7 @@ Return Formats There are a number of API calls that provide time series, information reported per item (e.g. per dataset, per file, by subject, by category, and by file Mimetype), or both (time series per item). Because these calls all report more than a single number, the API provides two optional formats for the return that can be selected by specifying an HTTP Accept Header for the desired format: -* application/json - a JSON array of objects. For time-series, the objects include key/values for the ``date`` and ``count`` for that month. For per-item calls, the objects include the item (e.g. for a subject), or it's id/pid (for a dataset or datafile). For timeseries per-item, the objects also include a date. In all cases, the response is a single array. +* application/json - a JSON array of objects. For time-series, the objects include key/values for the ``date`` and ``count`` for that month. For per-item calls, the objects include the item (e.g. for a subject), or it's id/pid (for a dataset or datafile (which may/may not not have a PID)). For timeseries per-item, the objects also include a date. In all cases, the response is a single array. * Example: ``curl -H 'Accept:application/json' https://demo.dataverse.org/api/info/metrics/downloads/monthly`` @@ -120,7 +120,7 @@ Example: ``curl https://demo.dataverse.org/api/info/metrics/makeDataCount/viewsT Endpoint Table -------------- -The following table lists the available metrics endpoints (not including the Make Data Counts endpoints a single dataset which are part of the :doc:`/api/native-api`) along with additional notes about them. +The following table lists the available metrics endpoints (not including the Make Data Counts endpoints for a single dataset which are part of the :doc:`/api/native-api`) along with additional notes about them. .. csv-table:: Metrics Endpoints diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 6d68d648cb3..3cd469e3883 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -526,7 +526,7 @@ To create a dataset, you must supply a JSON file that contains at least the foll - Description Text - Subject -As a starting point, you can download :download:`dataset-finch1.json <../../../../scripts/search/tests/data/dataset-finch1.json>` and modify it to meet your needs. (:download:`dataset-create-new-all-default-fields.json <../../../../scripts/api/data/dataset-finch1_fr.json>` is a variant of this file that includes setting the metadata language (see :ref:`:MetadataLanguages`) to French (fr). In addition to this minimal example, you can download :download:`dataset-create-new-all-default-fields.json <../../../../scripts/api/data/dataset-create-new-all-default-fields.json>` which populates all of the metadata fields that ship with a Dataverse installation.) +As a starting point, you can download :download:`dataset-finch1.json <../../../../scripts/search/tests/data/dataset-finch1.json>` and modify it to meet your needs. (:download:`dataset-finch1_fr.json <../../../../scripts/api/data/dataset-finch1_fr.json>` is a variant of this file that includes setting the metadata language (see :ref:`:MetadataLanguages`) to French (fr). In addition to this minimal example, you can download :download:`dataset-create-new-all-default-fields.json <../../../../scripts/api/data/dataset-create-new-all-default-fields.json>` which populates all of the metadata fields that ship with a Dataverse installation.) The curl command below assumes you have kept the name "dataset-finch1.json" and that this file is in your current working directory. @@ -552,6 +552,8 @@ You should expect an HTTP 200 ("OK") response and JSON indicating the database I .. note:: Only a Dataverse installation account with superuser permissions is allowed to include files when creating a dataset via this API. Adding files this way only adds their file metadata to the database, you will need to manually add the physical files to the file system. +.. _api-import-dataset: + Import a Dataset into a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -728,13 +730,12 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key:$API_TOKEN" https://demo.dataverse.org/api/datasets/:persistentId/versions/:draft?persistentId=doi:10.5072/FK2/J8SJZB - -|CORS| Show the dataset whose id is passed: +|CORS| Show the dataset whose database id is passed: .. code-block:: bash export SERVER_URL=https://demo.dataverse.org - export ID=408730 + export ID=24 curl $SERVER_URL/api/datasets/$ID @@ -742,7 +743,7 @@ The fully expanded example above (without environment variables) looks like this .. code-block:: bash - curl https://demo.dataverse.org/api/datasets/408730 + curl https://demo.dataverse.org/api/datasets/24 The dataset id can be extracted from the response retrieved from the API which uses the persistent identifier (``/api/datasets/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER``). @@ -1511,6 +1512,45 @@ The fully expanded example above (without environment variables) looks like this curl -H X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/datasets/:persistentId/add?persistentId=doi:10.5072/FK2/J8SJZB -F 'jsonData={"description":"A remote image.","storageIdentifier":"trsa://themes/custom/qdr/images/CoreTrustSeal-logo-transparent.png","checksumType":"MD5","md5Hash":"509ef88afa907eaf2c17c1c8d8fde77e","label":"testlogo.png","fileName":"testlogo.png","mimeType":"image/png"}' +.. _cleanup-storage-api: + +Cleanup storage of a Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is an experimental feature and should be tested on your system before using it in production. +Also, make sure that your backups are up-to-date before using this on production servers. +It is advised to first call this method with the ``dryrun`` parameter set to ``true`` before actually deleting the files. +This will allow you to manually inspect the files that would be deleted if that parameter is set to ``false`` or is omitted (a list of the files that would be deleted is provided in the response). + +If your Dataverse installation has been configured to support direct uploads, or in some other situations, +you could end up with some files in the storage of a dataset that are not linked to that dataset directly. Most commonly, this could +happen when an upload fails in the middle of a transfer, i.e. if a user does a UI direct upload and leaves the page without hitting cancel or save, +Dataverse doesn't know and doesn't clean up the files. Similarly in the direct upload API, if the final /addFiles call isn't done, the files are abandoned. + +All the files stored in the Dataset storage location that are not in the file list of that Dataset (and follow the naming pattern of the dataset files) can be removed, as shown in the example below. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.5072/FK2/J8SJZB + export DRYRUN=true + + curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/datasets/:persistentId/cleanStorage?persistentId=$PERSISTENT_ID&dryrun=$DRYRUN" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X GET https://demo.dataverse.org/api/datasets/:persistentId/cleanStorage?persistentId=doi:10.5072/FK2/J8SJZB&dryrun=true + +Adding Files To a Dataset via Other Tools +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In some circumstances, it may be useful to move or copy files into Dataverse's storage manually or via external tools and then add then to a dataset (i.e. without involving Dataverse in the file transfer itself). +Two API calls are available for this use case to add files to a dataset or to replace files that were already in the dataset. +These calls were developed as part of Dataverse's direct upload mechanism and are detailed in :doc:`/developers/s3-direct-upload-api`. + Report the data (file) size of a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2029,10 +2069,99 @@ Archiving is an optional feature that may be configured for a Dataverse installa curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE "$SERVER_URL/api/datasets/:persistentId/$VERSION/archivalStatus?persistentId=$PERSISTENT_IDENTIFIER" +Get External Tool Parameters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This API call is intended as a callback that can be used by :doc:`/installation/external-tools` to retrieve signed Urls necessary for their interaction with Dataverse. +It can be called directly as well. + +The response is a JSON object described in the :doc:`/api/external-tools` section of the API guide. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV + export VERSION=1.0 + export TOOL_ID=1 + + + curl -H "X-Dataverse-key: $API_TOKEN" -H "Accept:application/json" "$SERVER_URL/api/datasets/:persistentId/versions/$VERSION/toolparams/$TOOL_ID?persistentId=$PERSISTENT_IDENTIFIER" Files ----- +Get JSON Representation of a File +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: Files can be accessed using persistent identifiers. This is done by passing the constant ``:persistentId`` where the numeric id of the file is expected, and then passing the actual persistent id as a query parameter with the name ``persistentId``. + +Example: Getting the file whose DOI is *10.5072/FK2/J8SJZB*: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + + curl -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/files/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" https://demo.dataverse.org/api/files/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB + +You may get its draft version of an unpublished file if you pass an api token with view draft permissions: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + + curl -H "X-Dataverse-key:$API_TOKEN" $SERVER/api/files/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" https://demo.dataverse.org/api/files/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB + + +|CORS| Show the file whose id is passed: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export ID=408730 + + curl $SERVER_URL/api/file/$ID + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl https://demo.dataverse.org/api/files/408730 + +You may get its draft version of an published file if you pass an api token with view draft permissions and use the draft path parameter: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + + curl -H "X-Dataverse-key:$API_TOKEN" $SERVER/api/files/:persistentId/draft/?persistentId=$PERSISTENT_IDENTIFIER + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" https://demo.dataverse.org/api/files/:persistentId/draft/?persistentId=doi:10.5072/FK2/J8SJZB + +The file id can be extracted from the response retrieved from the API which uses the persistent identifier (``/api/datasets/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER``). + Adding Files ~~~~~~~~~~~~ @@ -2230,6 +2359,47 @@ Currently the following methods are used to detect file types: - The file extension (e.g. ".ipybn") is used, defined in a file called ``MimeTypeDetectionByFileExtension.properties``. - The file name (e.g. "Dockerfile") is used, defined in a file called ``MimeTypeDetectionByFileName.properties``. +.. _extractNcml: + +Extract NcML +~~~~~~~~~~~~ + +As explained in the :ref:`netcdf-and-hdf5` section of the User Guide, when those file types are uploaded, an attempt is made to extract an NcML file from them and store it as an auxiliary file. + +This happens automatically but superusers can also manually trigger this NcML extraction process with the API endpoint below. + +Note that "true" will be returned if an NcML file was created. "false" will be returned if there was an error or if the NcML file already exists (check server.log for details). + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=24 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/$ID/extractNcml" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/24/extractNcml + +A curl example using a PID: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.5072/FK2/AAA000 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/:persistentId/extractNcml?persistentId=$PERSISTENT_ID" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/:persistentId/extractNcml?persistentId=doi:10.5072/FK2/AAA000" + Replacing Files ~~~~~~~~~~~~~~~ @@ -2348,48 +2518,6 @@ The fully expanded example above (without environment variables) looks like this Note: The ``id`` returned in the json response is the id of the file metadata version. - -Adding File Metadata -~~~~~~~~~~~~~~~~~~~~ - -This API call requires a ``jsonString`` expressing the metadata of multiple files. It adds file metadata to the database table where the file has already been copied to the storage. - -The jsonData object includes values for: - -* "description" - A description of the file -* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset -* "storageIdentifier" - String -* "fileName" - String -* "mimeType" - String -* "fixity/checksum" either: - - * "md5Hash" - String with MD5 hash value, or - * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings - -.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of ``export`` below. - -A curl example using an ``PERSISTENT_ID`` - -* ``SERVER_URL`` - e.g. https://demo.dataverse.org -* ``API_TOKEN`` - API endpoints require an API token that can be passed as the X-Dataverse-key HTTP header. For more details, see the :doc:`auth` section. -* ``PERSISTENT_IDENTIFIER`` - Example: ``doi:10.5072/FK2/7U7YBV`` - -.. code-block:: bash - - export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx - export SERVER_URL=https://demo.dataverse.org - export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV - export JSON_DATA="[{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \ - {'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]" - - curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" - -The fully expanded example above (without environment variables) looks like this: - -.. code-block:: bash - - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/datasets/:persistentId/addFiles?persistentId=doi:10.5072/FK2/7U7YBV -F jsonData='[{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}, {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]' - Updating File Metadata ~~~~~~~~~~~~~~~~~~~~~~ @@ -2689,6 +2817,24 @@ Note the optional "limit" parameter. Without it, the API will attempt to populat By default, the admin API calls are blocked and can only be called from localhost. See more details in :ref:`:BlockedApiEndpoints <:BlockedApiEndpoints>` and :ref:`:BlockedApiPolicy <:BlockedApiPolicy>` settings in :doc:`/installation/config`. +Get External Tool Parameters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This API call is intended as a callback that can be used by :doc:`/installation/external-tools` to retrieve signed Urls necessary for their interaction with Dataverse. +It can be called directly as well. (Note that the required FILEMETADATA_ID is the "id" returned in the JSON response from the /api/files/$FILE_ID/metadata call.) + +The response is a JSON object described in the :doc:`/api/external-tools` section of the API guide. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export FILE_ID=3 + export FILEMETADATA_ID=1 + export TOOL_ID=1 + + curl -H "X-Dataverse-key: $API_TOKEN" -H "Accept:application/json" "$SERVER_URL/api/files/$FILE_ID/metadata/$FILEMETADATA_ID/toolparams/$TOOL_ID + Users Token Management ---------------------- @@ -2971,22 +3117,47 @@ The fully expanded example above (without environment variables) looks like this curl https://demo.dataverse.org/api/info/apiTermsOfUse +.. _metadata-blocks-api: + Metadata Blocks --------------- +See also :ref:`exploring-metadata-blocks`. + Show Info About All Metadata Blocks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -|CORS| Lists brief info about all metadata blocks registered in the system:: +|CORS| Lists brief info about all metadata blocks registered in the system. + +.. code-block:: bash - GET http://$SERVER/api/metadatablocks + export SERVER_URL=https://demo.dataverse.org + + curl $SERVER_URL/api/metadatablocks + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl https://demo.dataverse.org/api/metadatablocks Show Info About Single Metadata Block ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -|CORS| Return data about the block whose ``identifier`` is passed. ``identifier`` can either be the block's id, or its name:: +|CORS| Return data about the block whose ``identifier`` is passed, including allowed controlled vocabulary values. ``identifier`` can either be the block's database id, or its name (i.e. "citation"). + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export IDENTIFIER=citation + + curl $SERVER_URL/api/metadatablocks/$IDENTIFIER + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash - GET http://$SERVER/api/metadatablocks/$identifier + curl https://demo.dataverse.org/api/metadatablocks/citation .. _Notifications: @@ -3200,6 +3371,149 @@ The fully expanded example above (without the environment variables) looks like Only users with superuser permissions may delete harvesting sets. +Managing Harvesting Clients +--------------------------- + +The following API can be used to create and manage "Harvesting Clients". A Harvesting Client is a configuration entry that allows your Dataverse installation to harvest and index metadata from a specific remote location, either regularly, on a configured schedule, or on a one-off basis. For more information, see the :doc:`/admin/harvestclients` section of the Admin Guide. + +List All Configured Harvesting Clients +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Shows all the Harvesting Clients configured:: + + GET http://$SERVER/api/harvest/clients/ + +Show a Specific Harvesting Client +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Shows a Harvesting Client with a defined nickname:: + + GET http://$SERVER/api/harvest/clients/$nickname + +.. code-block:: bash + + curl "http://localhost:8080/api/harvest/clients/myclient" + + { + "status":"OK", + { + "data": { + "lastDatasetsFailed": "22", + "lastDatasetsDeleted": "0", + "metadataFormat": "oai_dc", + "archiveDescription": "This Dataset is harvested from our partners. Clicking the link will take you directly to the archival source of the data.", + "archiveUrl": "https://dataverse.foo.edu", + "harvestUrl": "https://dataverse.foo.edu/oai", + "style": "dataverse", + "type": "oai", + "dataverseAlias": "fooData", + "nickName": "myClient", + "set": "fooSet", + "schedule": "none", + "status": "inActive", + "lastHarvest": "Thu Oct 13 14:48:57 EDT 2022", + "lastResult": "SUCCESS", + "lastSuccessful": "Thu Oct 13 14:48:57 EDT 2022", + "lastNonEmpty": "Thu Oct 13 14:48:57 EDT 2022", + "lastDatasetsHarvested": "137" + } + } + + +Create a Harvesting Client +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To create a new harvesting client:: + + POST http://$SERVER/api/harvest/clients/$nickname + +``nickName`` is the name identifying the new client. It should be alpha-numeric and may also contain -, _, or %, but no spaces. Must also be unique in the installation. + +You must supply a JSON file that describes the configuration, similarly to the output of the GET API above. The following fields are mandatory: + +- dataverseAlias: The alias of an existing collection where harvested datasets will be deposited +- harvestUrl: The URL of the remote OAI archive +- archiveUrl: The URL of the remote archive that will be used in the redirect links pointing back to the archival locations of the harvested records. It may or may not be on the same server as the harvestUrl above. If this OAI archive is another Dataverse installation, it will be the same URL as harvestUrl minus the "/oai". For example: https://demo.dataverse.org/ vs. https://demo.dataverse.org/oai +- metadataFormat: A supported metadata format. As of writing this the supported formats are "oai_dc", "oai_ddi" and "dataverse_json". + +The following optional fields are supported: + +- archiveDescription: What the name suggests. If not supplied, will default to "This Dataset is harvested from our partners. Clicking the link will take you directly to the archival source of the data." +- set: The OAI set on the remote server. If not supplied, will default to none, i.e., "harvest everything". +- style: Defaults to "default" - a generic OAI archive. (Make sure to use "dataverse" when configuring harvesting from another Dataverse installation). +- customHeaders: This can be used to configure this client with a specific HTTP header that will be added to every OAI request. This is to accommodate a use case where the remote server requires this header to supply some form of a token in order to offer some content not available to other clients. See the example below. Multiple headers can be supplied separated by `\\n` - actual "backslash" and "n" characters, not a single "new line" character. + +Generally, the API will accept the output of the GET version of the API for an existing client as valid input, but some fields will be ignored. For example, as of writing this there is no way to configure a harvesting schedule via this API. + +An example JSON file would look like this:: + + { + "nickName": "zenodo", + "dataverseAlias": "zenodoHarvested", + "harvestUrl": "https://zenodo.org/oai2d", + "archiveUrl": "https://zenodo.org", + "archiveDescription": "Moissonné depuis la collection LMOPS de l'entrepôt Zenodo. En cliquant sur ce jeu de données, vous serez redirigé vers Zenodo.", + "metadataFormat": "oai_dc", + "customHeaders": "x-oai-api-key: xxxyyyzzz", + "set": "user-lmops" + } + +.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of export below. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=http://localhost:8080 + + curl -H X-Dataverse-key:$API_TOKEN -X POST -H "Content-Type: application/json" "$SERVER_URL/api/harvest/clients/zenodo" --upload-file client.json + +The fully expanded example above (without the environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST -H "Content-Type: application/json" "http://localhost:8080/api/harvest/clients/zenodo" --upload-file "client.json" + + { + "status": "OK", + "data": { + "metadataFormat": "oai_dc", + "archiveDescription": "Moissonné depuis la collection LMOPS de l'entrepôt Zenodo. En cliquant sur ce jeu de données, vous serez redirigé vers Zenodo.", + "archiveUrl": "https://zenodo.org", + "harvestUrl": "https://zenodo.org/oai2d", + "style": "default", + "type": "oai", + "dataverseAlias": "zenodoHarvested", + "nickName": "zenodo", + "set": "user-lmops", + "schedule": "none", + "status": "inActive", + "lastHarvest": "N/A", + "lastSuccessful": "N/A", + "lastNonEmpty": "N/A", + "lastDatasetsHarvested": "N/A", + "lastDatasetsDeleted": "N/A" + } + } + +Only users with superuser permissions may create or configure harvesting clients. + +Modify a Harvesting Client +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Similar to the API above, using the same JSON format, but run on an existing client and using the PUT method instead of POST. + +Delete a Harvesting Client +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Self-explanatory: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "http://localhost:8080/api/harvest/clients/$nickName" + +Only users with superuser permissions may delete harvesting clients. + + PIDs ---- @@ -4015,7 +4329,7 @@ View the details of the standard license with the database ID specified in ``$ID curl $SERVER_URL/api/licenses/$ID -Superusers can add a new license by posting a JSON file adapted from this example :download:`add-license.json <../_static/api/add-license.json>`. The ``name`` and ``uri`` of the new license must be unique. If you are interested in adding a Creative Commons license, you are encouarged to use the JSON files under :ref:`adding-creative-commons-licenses`: +Superusers can add a new license by posting a JSON file adapted from this example :download:`add-license.json <../_static/api/add-license.json>`. The ``name`` and ``uri`` of the new license must be unique. Sort order field is mandatory. If you are interested in adding a Creative Commons license, you are encouarged to use the JSON files under :ref:`adding-creative-commons-licenses`: .. code-block:: bash @@ -4040,6 +4354,13 @@ Superusers can delete a license, provided it is not in use, by the license ``$ID .. code-block:: bash curl -X DELETE -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/licenses/$ID + +Superusers can change the sorting order of a license specified by the license ``$ID``: + +.. code-block:: bash + + export SORT_ORDER=100 + curl -X PUT -H 'Content-Type: application/json' -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/licenses/$ID/:sortOrder/$SORT_ORDER List Dataset Templates ~~~~~~~~~~~~~~~~~~~~~~ @@ -4070,6 +4391,33 @@ The fully expanded example above (without environment variables) looks like this .. code-block:: bash curl -X DELETE https://demo.dataverse.org/api/admin/template/24 + +.. _api-native-signed-url: - - +Request Signed URL +~~~~~~~~~~~~~~~~~~ + +Dataverse has the ability to create signed URLs for it's API calls. +A signature, which is valid only for the specific API call and only for a specified duration, allows the call to proceed with the authentication of the specified user. +It is intended as an alternative to the use of an API key (which is valid for a long time period and can be used with any API call). +Signed URLs were developed to support External Tools but may be useful in other scenarios where Dataverse or a third-party tool needs to delegate limited access to another user or tool. +This API call allows a Dataverse superUser to generate a signed URL for such scenarios. +The JSON input parameter required is an object with the following keys: + +- ``url`` - the exact URL to sign, including api version number and all query parameters +- ``timeOut`` - how long in minutes the signature should be valid for, default is 10 minutes +- ``httpMethod`` - which HTTP method is required, default is GET +- ``user`` - the user identifier for the account associated with this signature, the default is the superuser making the call. The API call will succeed/fail based on whether the specified user has the required permissions. + +A curl example using allowing access to a dataset's metadata + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export API_KEY=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export JSON='{"url":"https://demo.dataverse.org/api/v1/datasets/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB","timeOut":5,"user":"alberteinstein"}' + + curl -H "X-Dataverse-key:$API_KEY" -H 'Content-Type:application/json' -d "$JSON" $SERVER_URL/api/admin/requestSignedUrl + +Please see :ref:`dataverse.api.signature-secret` for the configuration option to add a shared secret, enabling extra +security. diff --git a/doc/sphinx-guides/source/api/search.rst b/doc/sphinx-guides/source/api/search.rst index d5e56543fb1..b941064f173 100755 --- a/doc/sphinx-guides/source/api/search.rst +++ b/doc/sphinx-guides/source/api/search.rst @@ -35,6 +35,8 @@ show_relevance boolean Whether or not to show details of which fields were ma show_facets boolean Whether or not to show facets that can be operated on by the "fq" parameter. False by default. See :ref:`advanced search example `. fq string A filter query on the search term. Multiple "fq" parameters can be used. See :ref:`advanced search example `. show_entity_ids boolean Whether or not to show the database IDs of the search results (for developer use). +geo_point string Latitude and longitude in the form ``geo_point=42.3,-71.1``. You must supply ``geo_radius`` as well. See also :ref:`geospatial-search`. +geo_radius string Radial distance in kilometers from ``geo_point`` (which must be supplied as well) such as ``geo_radius=1.5``. metadata_fields string Includes the requested fields for each dataset in the response. Multiple "metadata_fields" parameters can be used to include several fields. The value must be in the form "{metadata_block_name}:{field_name}" to include a specific field from a metadata block (see :ref:`example `) or "{metadata_field_set_name}:\*" to include all the fields for a metadata block (see :ref:`example `). "{field_name}" cannot be a subfield of a compound field. If "{field_name}" is a compound field, all subfields are included. =============== ======= =========== diff --git a/doc/sphinx-guides/source/conf.py b/doc/sphinx-guides/source/conf.py index 590eee4bd9d..736d86cacf5 100755 --- a/doc/sphinx-guides/source/conf.py +++ b/doc/sphinx-guides/source/conf.py @@ -66,9 +66,9 @@ # built documents. # # The short X.Y version. -version = '5.12.1' +version = '5.13' # The full version, including alpha/beta/rc tags. -release = '5.12.1' +release = '5.13' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst new file mode 100644 index 00000000000..931c722f91b --- /dev/null +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -0,0 +1,354 @@ +Application Base Image +====================== + +.. contents:: |toctitle| + :local: + +A "base image" offers you a pre-installed and pre-tuned application server to deploy Dataverse software to. +Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks etc is all done +at this layer, to make the application image focus on the app itself. + +**NOTE: The base image does not contain the Dataverse application itself.** + +Within the main repository, you may find the base image's files at ``/modules/container-base``. +This Maven module uses the `Maven Docker Plugin `_ to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**NOTE: This image is created, maintained and supported by the Dataverse community on a best-effort basis.** +IQSS will not offer you support how to deploy or run it, please reach out to the community for help on using it. +You might be interested in taking a look at :doc:`../developers/containers`, linking you to some (community-based) +efforts. + +Supported Image Tags +++++++++++++++++++++ + +This image is sourced from the main upstream code `repository of the Dataverse software `_. +Development and maintenance of the `image's code `_ +happens there (again, by the community). Community-supported image tags are based on the two most important +upstream branches: + +- The ``unstable`` tag corresponds to the ``develop`` branch, where pull requests are merged. + (`Dockerfile `__) +- The ``stable`` tag corresponds to the ``master`` branch, where releases are cut from. + (`Dockerfile `__) + + + +Image Contents +++++++++++++++ + +The base image provides: + +- `Eclipse Temurin JRE using Java 11 `_ +- `Payara Community Application Server `_ +- CLI tools necessary to run Dataverse (i. e. ``curl`` or ``jq`` - see also :doc:`../installation/prerequisites` in Installation Guide) +- Linux tools for analysis, monitoring and so on +- `Jattach `__ (attach to running JVM) +- `wait-for `__ (tool to "wait for" a service to be available) +- `dumb-init `__ (see :ref:`below ` for details) + +This image is created as a "multi-arch image", see :ref:`below `. + +It inherits (is built on) an Ubuntu environment from the upstream +`base image of Eclipse Temurin `_. +You are free to change the JRE/JDK image to your liking (see below). + + + +Build Instructions +++++++++++++++++++ + +Assuming you have `Docker `_, `Docker Desktop `_, +`Moby `_ or some remote Docker host configured, up and running from here on. + +Simply execute the Maven modules packaging target with activated "container profile. Either from the projects Git root: + +``mvn -Pct -f modules/container-base install`` + +Or move to the module and execute: + +``cd modules/container-base && mvn -Pct install`` + +Some additional notes, using Maven parameters to change the build and use ...: + +- | ... a different tag only: add ``-Dbase.image.tag=tag``. + | *Note:* default is ``develop`` +- | ... a different image name and tag: add ``-Dbase.image=name:tag``. + | *Note:* default is ``gdcc/base:${base.image.tag}`` +- ... a different image registry than Docker Hub: add ``-Ddocker.registry=registry.example.org`` (see also + `DMP docs on registries `__) +- ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``. +- | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...). + | *Note:* must resolve to an available image tag ``A-jre`` of Eclipse Temurin! + (See also `Docker Hub search example `_) +- ... a different Java Distribution: add ``-Djava.image="name:tag"`` with precise reference to an + image available local or remote. +- ... a different UID/GID for the ``payara`` user/group: add ``-Dbase.image.uid=1234`` (or ``.gid``) + +Automated Builds & Publishing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To make reusing most simple, the image is built with a Github Action within the IQSS repository and then pushed +to `Docker Hub gdcc/base repository `_. It is built and pushed on every edit to +its sources plus uncached scheduled nightly builds to make sure security updates are finding their way in. + +*Note:* For the Github Action to be able to push to Docker Hub, two repository secrets +(DOCKERHUB_USERNAME, DOCKERHUB_TOKEN) have been added by IQSS admins to their repository. + +.. _base-multiarch: + +Processor Architecture and Multiarch +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This image is created as a "multi-arch image", supporting the most common architectures Dataverse usually runs on: +AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2), by using Maven Docker Plugin's *BuildX* mode. + +Building the image via ``mvn -Pct package`` or ``mvn -Pct install`` as above will only build for the architecture of +the Docker maschine's CPU. + +Only ``mvn -Pct deploy`` will trigger building on all enabled architectures. +Yet, to enable building with non-native code on your build machine, you will need to setup a cross-platform builder. + +On Linux, you should install `qemu-user-static `__ (preferably via +your package management) on the host and run ``docker run --rm --privileged multiarch/qemu-user-static --reset -p yes`` +to enable that builder. The Docker plugin will setup everything else for you. + + + +Tunables +++++++++ + +The base image provides a Payara domain suited for production use, but can also be used during development. +Many settings have been carefully selected for best performance and stability of the Dataverse application. + +As with any service, you should always monitor any metrics and make use of the tuning capabilities the base image +provides. These are mostly based on environment variables (very common with containers) and provide sane defaults. + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 10 50 + :header-rows: 1 + + * - Env. variable + - Default + - Type + - Description + * - ``DEPLOY_PROPS`` + - (empty) + - String + - Set to add arguments to generated `asadmin deploy` commands. + * - ``PREBOOT_COMMANDS`` + - [preboot]_ + - Abs. path + - Provide path to file with ``asadmin`` commands to run **before** boot of application server. + See also `Pre/postboot script docs`_. + * - ``POSTBOOT_COMMANDS`` + - [postboot]_ + - Abs. path + - Provide path to file with ``asadmin`` commands to run **after** boot of application server. + See also `Pre/postboot script docs`_. + * - ``JVM_ARGS`` + - (empty) + - String + - Additional arguments to pass to application server's JVM on start. + * - ``MEM_MAX_RAM_PERCENTAGE`` + - ``70.0`` + - Percentage + - Maximum amount of container's allocated RAM to be used as heap space. + Make sure to leave some room for native memory, OS overhead etc! + * - ``MEM_XSS`` + - ``512k`` + - Size + - Tune the maximum JVM stack size. + * - ``MEM_MIN_HEAP_FREE_RATIO`` + - ``20`` + - Integer + - Make the heap shrink aggressively and grow conservatively. See also `run-java-sh recommendations`_. + * - ``MEM_MAX_HEAP_FREE_RATIO`` + - ``40`` + - Integer + - Make the heap shrink aggressively and grow conservatively. See also `run-java-sh recommendations`_. + * - ``MEM_MAX_GC_PAUSE_MILLIS`` + - ``500`` + - Milliseconds + - Shorter pause times might result in lots of collections causing overhead without much gain. + This needs monitoring and tuning. It's a complex matter. + * - ``MEM_METASPACE_SIZE`` + - ``256m`` + - Size + - Initial size of memory reserved for class metadata, also used as trigger to run a garbage collection + once passing this size. + * - ``MEM_MAX_METASPACE_SIZE`` + - ``2g`` + - Size + - The metaspace's size will not outgrow this limit. + * - ``ENABLE_DUMPS`` + - ``0`` + - Bool, ``0|1`` + - If enabled, the argument(s) given in ``JVM_DUMP_ARG`` will be added to the JVM starting up. + This means it will enable dumping the heap to ``${DUMPS_DIR}`` (see below) in "out of memory" cases. + (You should back this location with disk space / ramdisk, so it does not write into an overlay filesystem!) + * - ``JVM_DUMPS_ARG`` + - [dump-option]_ + - String + - Can be fine tuned for more grained controls of dumping behaviour. + * - ``ENABLE_JMX`` + - ``0`` + - Bool, ``0|1`` + - Allow insecure JMX connections, enable AMX and tune all JMX monitoring levels to ``HIGH``. + See also `Payara Docs - Basic Monitoring `_. + A basic JMX service is enabled by default in Payara, exposing basic JVM MBeans, but especially no Payara MBeans. + * - ``ENABLE_JDWP`` + - ``0`` + - Bool, ``0|1`` + - Enable the "Java Debug Wire Protocol" to attach a remote debugger to the JVM in this container. + Listens on port 9009 when enabled. Search the internet for numerous tutorials to use it. + * - ``ENABLE_RELOAD`` + - ``0`` + - Bool, ``0|1`` + - Enable the dynamic "hot" reloads of files when changed in a deployment. Useful for development, + when new artifacts are copied into the running domain. + * - ``DATAVERSE_HTTP_TIMEOUT`` + - ``900`` + - Seconds + - See :ref:`:ApplicationServerSettings` ``http.request-timeout-seconds``. + + *Note:* can also be set using any other `MicroProfile Config Sources`_ available via ``dataverse.http.timeout``. + + +.. [preboot] ``${CONFIG_DIR}/pre-boot-commands.asadmin`` +.. [postboot] ``${CONFIG_DIR}/post-boot-commands.asadmin`` +.. [dump-option] ``-XX:+HeapDumpOnOutOfMemoryError`` + + + +Locations ++++++++++ + +This environment variables represent certain locations and might be reused in your scripts etc. +All of these variables aren't meant to be reconfigurable and reflect state in the filesystem layout! + +**Writeable at build time:** + +The overlay filesystem of Docker and other container technologies is not meant to be used for any performance IO. +You should avoid *writing* data anywhere in the file tree at runtime, except for well known locations with mounted +volumes backing them (see below). + +The locations below are meant to be written to when you build a container image, either this base or anything +building upon it. You can also use these for references in scripts, etc. + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 50 + :header-rows: 1 + + * - Env. variable + - Value + - Description + * - ``HOME_DIR`` + - ``/opt/payara`` + - Home base to Payara and the application + * - ``PAYARA_DIR`` + - ``${HOME_DIR}/appserver`` + - Installation directory of Payara server + * - ``SCRIPT_DIR`` + - ``${HOME_DIR}/scripts`` + - Any scripts like the container entrypoint, init scripts, etc + * - ``CONFIG_DIR`` + - ``${HOME_DIR}/config`` + - Payara Server configurations like pre/postboot command files go here + (Might be reused for Dataverse one day) + * - ``DEPLOY_DIR`` + - ``${HOME_DIR}/deployments`` + - Any EAR or WAR file, exploded WAR directory etc are autodeployed on start + * - ``DOMAIN_DIR`` + - ``${PAYARA_DIR}/glassfish`` ``/domains/${DOMAIN_NAME}`` + - Path to root of the Payara domain applications will be deployed into. Usually ``${DOMAIN_NAME}`` will be ``domain1``. + + +**Writeable at runtime:** + +The locations below are defined as `Docker volumes `_ by the base image. +They will by default get backed by an "anonymous volume", but you can (and should) bind-mount a host directory or +named Docker volume in these places to avoid data loss, gain performance and/or use a network file system. + +**Notes:** +1. On Kubernetes you still need to provide volume definitions for these places in your deployment objects! +2. You should not write data into these locations at build time - it will be shadowed by the mounted volumes! + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 50 + :header-rows: 1 + + * - Env. variable + - Value + - Description + * - ``STORAGE_DIR`` + - ``/dv`` + - This place is writeable by the Payara user, making it usable as a place to store research data, customizations + or other. Images inheriting the base image should create distinct folders here, backed by different + mounted volumes. + * - ``SECRETS_DIR`` + - ``/secrets`` + - Mount secrets or other here, being picked up automatically by + `Directory Config Source `_. + See also various :doc:`../installation/config` options involving secrets. + * - ``DUMPS_DIR`` + - ``/dumps`` + - Default location where heap dumps will be stored (see above). + You should mount some storage here (disk or ephemeral). + + +Exposed Ports ++++++++++++++ + +The default ports that are exposed by this image are: + +- 8080 - HTTP listener +- 4848 - Admin Service HTTPS listener +- 8686 - JMX listener +- 9009 - "Java Debug Wire Protocol" port (when ``ENABLE_JDWP=1``) + +The HTTPS listener (on port 8181) becomes deactivated during the build, as we will always need to reverse-proxy the +application server and handle SSL/TLS termination at this point. Save the memory and some CPU cycles! + + + +.. _base-entrypoint: + +Entry & Extension Points +++++++++++++++++++++++++ + +The entrypoint shell script provided by this base image will by default ensure to: + +- Run any scripts named ``${SCRIPT_DIR}/init_*`` or in ``${SCRIPT_DIR}/init.d/*`` directory for initialization + **before** the application server starts. +- Run an executable script ``${SCRIPT_DIR}/startInBackground.sh`` in the background - if present. +- Run the application server startup scripting in foreground (``${SCRIPT_DIR}/startInForeground.sh``). + +If you need to create some scripting that runs in parallel under supervision of `dumb-init `_, +e.g. to wait for the application to deploy before executing something, this is your point of extension: simply provide +the ``${SCRIPT_DIR}/startInBackground.sh`` executable script with your application image. + + + +Other Hints ++++++++++++ + +By default, ``domain1`` is enabled to use the ``G1GC`` garbage collector. + +For running a Java application within a Linux based container, the support for CGroups is essential. It has been +included and activated by default since Java 8u192, Java 11 LTS and later. If you are interested in more details, +you can read about those in a few places like https://developers.redhat.com/articles/2022/04/19/java-17-whats-new-openjdks-container-awareness, +https://www.eclipse.org/openj9/docs/xxusecontainersupport, etc. The other memory defaults are inspired +from `run-java-sh recommendations`_. + + + +.. _Pre/postboot script docs: https://docs.payara.fish/community/docs/Technical%20Documentation/Payara%20Micro%20Documentation/Payara%20Micro%20Configuration%20and%20Management/Micro%20Management/Asadmin%20Commands/Pre%20and%20Post%20Boot%20Commands.html +.. _MicroProfile Config Sources: https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html +.. _run-java-sh recommendations: https://github.com/fabric8io-images/run-java-sh/blob/master/TUNING.md#recommandations diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst new file mode 100644 index 00000000000..92ac94e2cf2 --- /dev/null +++ b/doc/sphinx-guides/source/container/index.rst @@ -0,0 +1,27 @@ +Container Guide +=============== + +Running the Dataverse software in containers is quite different than in a :doc:`standard installation <../installation/prep>`. + +Both approaches have pros and cons. These days, containers are very often used for development and testing, +but there is an ever rising move toward running applications in the cloud using container technology. + +**NOTE:** +**As the Institute for Quantitative Social Sciences (IQSS) at Harvard is running a standard, non-containerized installation, +container support described in this guide is mostly created and maintained by the Dataverse community on a best-effort +basis.** + +This guide is *not* about installation on technology like Docker Swarm, Kubernetes, Rancher or other +solutions to run containers in production. There is the `Dataverse on K8s project `_ for this +purpose, as mentioned in the :doc:`/developers/containers` section of the Developer Guide. + +This guide focuses on describing the container images managed from the main Dataverse repository (again: by the +community, not IQSS), their features and limitations. Instructions on how to build the images yourself and how to +develop and extend them further are provided. + +**Contents:** + +.. toctree:: + + base-image + diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 0782fd239a1..0a3dd23ed23 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -36,10 +36,20 @@ At present, one potential drawback for direct-upload is that files are only part ``./asadmin create-jvm-options "-Ddataverse.files..ingestsizelimit="`` +.. _cors-s3-bucket: -**IMPORTANT:** One additional step that is required to enable direct uploads via a Dataverse installation and for direct download to work with previewers is to allow cross site (CORS) requests on your S3 store. +Allow CORS for S3 Buckets +~~~~~~~~~~~~~~~~~~~~~~~~~ + +**IMPORTANT:** One additional step that is required to enable direct uploads via a Dataverse installation and for direct download to work with previewers and direct upload to work with dvwebloader (:ref:`folder-upload`) is to allow cross site (CORS) requests on your S3 store. The example below shows how to enable CORS rules (to support upload and download) on a bucket using the AWS CLI command line tool. Note that you may want to limit the AllowedOrigins and/or AllowedHeaders further. https://github.com/gdcc/dataverse-previewers/wiki/Using-Previewers-with-download-redirects-from-S3 has some additional information about doing this. +If you'd like to check the CORS configuration on your bucket before making changes: + +``aws s3api get-bucket-cors --bucket `` + +To proceed with making changes: + ``aws s3api put-bucket-cors --bucket --cors-configuration file://cors.json`` with the contents of the file cors.json as follows: diff --git a/doc/sphinx-guides/source/developers/containers.rst b/doc/sphinx-guides/source/developers/containers.rst index 64c7710f0f5..63eff266a4f 100755 --- a/doc/sphinx-guides/source/developers/containers.rst +++ b/doc/sphinx-guides/source/developers/containers.rst @@ -9,6 +9,8 @@ The Dataverse Community is exploring the use of Docker, Kubernetes, and other co The :doc:`testing` section mentions using Docker for integration tests. +See also the :doc:`/container/index`. + .. contents:: |toctitle| :local: diff --git a/doc/sphinx-guides/source/developers/documentation.rst b/doc/sphinx-guides/source/developers/documentation.rst index b20fd112533..c89ed6e3b75 100755 --- a/doc/sphinx-guides/source/developers/documentation.rst +++ b/doc/sphinx-guides/source/developers/documentation.rst @@ -22,6 +22,8 @@ That's it! Thank you for your contribution! Your pull request will be added manu Please see https://github.com/IQSS/dataverse/pull/5857 for an example of a quick fix that was merged (the "Files changed" tab shows how a typo was fixed). +Preview your documentation changes which will be built automatically as part of your pull request in Github. It will show up as a check entitled: `docs/readthedocs.org:dataverse-guide — Read the Docs build succeeded!`. For example, this PR built to https://dataverse-guide--9249.org.readthedocs.build/en/9249/. + If you would like to read more about the Dataverse Project's use of GitHub, please see the :doc:`version-control` section. For bug fixes and features we request that you create an issue before making a pull request but this is not at all necessary for quick fixes to the documentation. .. _admin: https://github.com/IQSS/dataverse/tree/develop/doc/sphinx-guides/source/admin diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst index bf525422c84..6f93cf75d51 100755 --- a/doc/sphinx-guides/source/developers/index.rst +++ b/doc/sphinx-guides/source/developers/index.rst @@ -19,6 +19,7 @@ Developer Guide sql-upgrade-scripts testing documentation + security dependencies debugging coding-style diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst index 55f5f550dd9..a2575bb5f50 100755 --- a/doc/sphinx-guides/source/developers/making-releases.rst +++ b/doc/sphinx-guides/source/developers/making-releases.rst @@ -95,6 +95,8 @@ At this point you can send around the draft release for any final feedback. Link Make corrections to the draft, if necessary. It will be out of sync with the .md file, but that's ok (`#7988 `_ is tracking this). +.. _run-build-create-war: + Run a Build to Create the War File ---------------------------------- @@ -110,6 +112,15 @@ Click "Save" then "Build Now". The build number will appear in ``/api/info/version`` (along with the commit mentioned above) from a running installation (e.g. ``{"version":"5.10.1","build":"907-b844672``). +Note that the build number comes from script in an early build step... + +.. code-block:: bash + + COMMIT_SHA1=`echo $GIT_COMMIT | cut -c-7` + echo "build.number=${BUILD_NUMBER}-${COMMIT_SHA1}" > $WORKSPACE/src/main/java/BuildNumber.properties + +... but we can explore alternative methods of specifying the build number, as described in :ref:`auto-custom-build-number`. + Build Installer (dvinstall.zip) ------------------------------- diff --git a/doc/sphinx-guides/source/developers/remote-users.rst b/doc/sphinx-guides/source/developers/remote-users.rst index a5e51aa5e54..21d36d28a75 100755 --- a/doc/sphinx-guides/source/developers/remote-users.rst +++ b/doc/sphinx-guides/source/developers/remote-users.rst @@ -1,6 +1,6 @@ -==================== -Shibboleth and OAuth -==================== +========================== +Shibboleth, OAuth and OIDC +========================== .. contents:: |toctitle| :local: @@ -30,4 +30,36 @@ Now when you go to http://localhost:8080/oauth2/firstLogin.xhtml you should be p ---- +OpenID Connect (OIDC) +--------------------- + +If you are working on the OpenID Connect (OIDC) user authentication flow, you do not need to connect to a remote provider (as explained in :doc:`/installation/oidc`) to test this feature. Instead, you can use the available configuration that allows you to run a test Keycloak OIDC identity management service locally through a Docker container. + +(Please note! The client secret (``ss6gE8mODCDfqesQaSG3gwUwZqZt547E``) is hard-coded in ``oidc-realm.json`` and ``oidc-keycloak-auth-provider.json``. Do not use this config in production! This is only for developers.) + +You can find this configuration in ``conf/keycloak``. There are two options available in this directory to run a Keycloak container: bash script or docker-compose. + +To run the container via bash script, execute the following command (positioned in ``conf/keycloak``): + +``./run-keycloak.sh`` + +The script will create a Keycloak container or restart it if the container was already created and stopped. Once the script is executed, Keycloak should be accessible from http://localhost:8090/ + +Now load the configuration defined in ``oidc-keycloak-auth-provider.json`` into your Dataverse installation to enable Keycloak as an authentication provider. + +``curl -X POST -H 'Content-type: application/json' --upload-file oidc-keycloak-auth-provider.json http://localhost:8080/api/admin/authenticationProviders`` + +You should see the new provider, called "OIDC-Keycloak", under "Other options" on the Log In page. + +You should be able to log into Keycloak with the following credentials: + +- username: kcuser +- password: kcpassword + +In case you want to stop and remove the Keycloak container, just run the other available bash script: + +``./rm-keycloak.sh`` + +---- + Previous: :doc:`unf/index` | Next: :doc:`geospatial` diff --git a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst index 3dc73ce6a0c..4d323455d28 100644 --- a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst +++ b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst @@ -122,7 +122,7 @@ To add multiple Uploaded Files to the Dataset --------------------------------------------- Once the files exists in the s3 bucket, a final API call is needed to add all the files to the Dataset. In this API call, additional metadata is added using the "jsonData" parameter. -jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: +jsonData for this call is an array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: * "description" - A description of the file * "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset @@ -154,7 +154,7 @@ Replacing an existing file in the Dataset ----------------------------------------- Once the file exists in the s3 bucket, a final API call is needed to register it as a replacement of an existing file. This call is the same call used to replace a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter. -jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, whether to allow the mimetype to change (forceReplace=true), etc. For direct uploads, the jsonData object must also include values for: +jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, whether to allow the mimetype to change (forceReplace=true), etc. For direct uploads, the jsonData object must include values for: * "storageIdentifier" - String, as specified in prior calls * "fileName" - String @@ -172,9 +172,107 @@ Note that the API call does not validate that the file matches the hash value su export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org export FILE_IDENTIFIER=5072 - export JSON_DATA="{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'forceReplace':'true', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}" + export JSON_DATA='{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "forceReplace":"true", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}' curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/files/$FILE_IDENTIFIER/replace" -F "jsonData=$JSON_DATA" Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. + +Replacing multiple existing files in the Dataset +------------------------------------------------ + +Once the replacement files exist in the s3 bucket, a final API call is needed to register them as replacements for existing files. In this API call, additional metadata is added using the "jsonData" parameter. +jsonData for this call is array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must include some additional values: + +* "fileToReplaceId" - the id of the file being replaced +* "forceReplace" - whether to replace a file with one of a different mimetype (optional, default is false) +* "description" - A description of the file +* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset +* "storageIdentifier" - String +* "fileName" - String +* "mimeType" - String +* "fixity/checksum" either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + + +The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV + export JSON_DATA='[{"fileToReplaceId": 10, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}},{"fileToReplaceId": 11, "forceReplace": true, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]' + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/replaceFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + +The JSON object returned as a response from this API call includes a "data" that indicates how many of the file replacements succeeded and provides per-file error messages for those that don't, e.g. + +.. code-block:: + + { + "status": "OK", + "data": { + "Files": [ + { + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", + "errorMessage": "Bad Request:The file to replace does not belong to this dataset.", + "fileDetails": { + "fileToReplaceId": 10, + "description": "My description.", + "directoryLabel": "data/subdir1", + "categories": [ + "Data" + ], + "restrict": "false", + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", + "fileName": "file1.Bin", + "mimeType": "application/octet-stream", + "checksum": { + "@type": "SHA-1", + "@value": "123456" + } + } + }, + { + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", + "successMessage": "Replaced successfully in the dataset", + "fileDetails": { + "description": "My description.", + "label": "file2.txt", + "restricted": false, + "directoryLabel": "data/subdir1", + "categories": [ + "Data" + ], + "dataFile": { + "persistentId": "", + "pidURL": "", + "filename": "file2.txt", + "contentType": "text/plain", + "filesize": 2407, + "description": "My description.", + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", + "rootDataFileId": 11, + "previousDataFileId": 11, + "checksum": { + "type": "SHA-1", + "value": "123789" + } + } + } + } + ], + "Result": { + "Total number of files": 2, + "Number of files successfully replaced": 1 + } + } + } + + +Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. diff --git a/doc/sphinx-guides/source/developers/security.rst b/doc/sphinx-guides/source/developers/security.rst new file mode 100755 index 00000000000..09b80a4c840 --- /dev/null +++ b/doc/sphinx-guides/source/developers/security.rst @@ -0,0 +1,34 @@ +======== +Security +======== + +This section describes security practices and procedures for the Dataverse team. + +.. contents:: |toctitle| + :local: + +Intake of Security Issues +------------------------- + +As described under :ref:`reporting-security-issues`, we encourage the community to email security@dataverse.org if they have any security concerns. These emails go into our private ticket tracker (RT_). + +.. _RT: https://help.hmdc.harvard.edu + +We use a private GitHub issue tracker at https://github.com/IQSS/dataverse-security/issues for security issues. + +Sending Security Notices +------------------------ + +When drafting the security notice, it might be helpful to look at `previous examples`_. + +.. _previous examples: https://drive.google.com/drive/folders/0B_qMYwdHFZghaDZIU2hWQnBDZVE?resourcekey=0-SYjuhCohAIM7_pmysVc3Xg&usp=sharing + +Gather email addresses from the following sources (these are also described under :ref:`ongoing-security` in the Installation Guide): + +- "contact_email" in the `public installation spreadsheet`_ +- "Other Security Contacts" in the `private installation spreadsheet`_ + +Once you have the emails, include them as bcc. + +.. _public installation spreadsheet: https://docs.google.com/spreadsheets/d/1bfsw7gnHlHerLXuk7YprUT68liHfcaMxs1rFciA-mEo/edit#gid=0 +.. _private installation spreadsheet: https://docs.google.com/spreadsheets/d/1EWDwsj6eptQ7nEr-loLvdU7I6Tm2ljAplfNSVWR42i0/edit?usp=sharing diff --git a/doc/sphinx-guides/source/developers/tips.rst b/doc/sphinx-guides/source/developers/tips.rst index 3fff3e76ea8..bf75a05f84e 100755 --- a/doc/sphinx-guides/source/developers/tips.rst +++ b/doc/sphinx-guides/source/developers/tips.rst @@ -58,6 +58,8 @@ From the root of the git repo, run the following command to set the build number This should update or place a file at ``src/main/java/BuildNumber.properties``. +(See also :ref:`auto-custom-build-number` for other ways of changing the build number.) + Then, from Netbeans, click "Run" and then "Clean and Build Project (dataverse)". After this completes successfully, click "Run" and then "Run Project (dataverse)" Confirm the Change Was Deployed @@ -164,6 +166,8 @@ Git on Mac On a Mac, you won't have git installed unless you have "Command Line Developer Tools" installed but running ``git clone`` for the first time will prompt you to install them. +.. _auto-custom-build-number: + Automation of Custom Build Number on Webpage ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -173,6 +177,15 @@ commit id in your test deployment webpages on the bottom right corner next to th When you prefer manual updates, there is another script, see above: :ref:`custom_build_num_script`. +An alternative to that is using *MicroProfile Config* and set the option ``dataverse.build`` via a system property, +environment variable (``DATAVERSE_BUILD``) or `one of the other config sources +`__. + +You could even override the version itself with the option ``dataverse.version`` in the same way, which is usually +picked up from a build time source. + +See also discussion of version numbers in :ref:`run-build-create-war`. + Sample Data ----------- diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst index 148518d2ce5..f6eda53d718 100755 --- a/doc/sphinx-guides/source/index.rst +++ b/doc/sphinx-guides/source/index.rst @@ -6,7 +6,7 @@ Dataverse Documentation v. |version| ==================================== -These documentation guides are for the |version| version of Dataverse. To find guides belonging to previous versions, :ref:`guides_versions` has a list of all available versions. +These documentation guides are for the |version| version of Dataverse. To find guides belonging to previous or future versions, :ref:`guides_versions` has a list of all available versions. .. toctree:: :glob: @@ -18,6 +18,7 @@ These documentation guides are for the |version| version of Dataverse. To find g api/index installation/index developers/index + container/index style/index How the Guides Are Organized @@ -25,11 +26,13 @@ How the Guides Are Organized The guides are documentation that explain how to use Dataverse, which are divided into the following sections: User Guide, -Installation Guide, Developer Guide, API Guide and Style Guide. The User Guide is further divided into primary activities: finding & using +Installation Guide, Developer Guide, API Guide, Style Guide and Container Guide. +The User Guide is further divided into primary activities: finding & using data, adding Datasets, administering dataverses or Datasets, and Dataset exploration/visualizations. Details on all of the above tasks can be found in the Users Guide. The Installation Guide is for people or organizations who want to host their -own Dataverse. The Developer Guide contains instructions for +own Dataverse. The Container Guide gives information on how to deploy Dataverse with containers. +The Developer Guide contains instructions for people who want to contribute to the Open Source Dataverse project or who want to modify the code to suit their own needs. Finally, the API Guide is for Developers that work on other applications and are interested in connecting with Dataverse through our APIs. @@ -67,7 +70,7 @@ The support email address is `support@dataverse.org `__ or use `GitHub pull requests `__, if you have some code, scripts or documentation that you'd like to share. -If you have a **security issue** to report, please email `security@dataverse.org `__. +If you have a **security issue** to report, please email `security@dataverse.org `__. See also :ref:`reporting-security-issues`. Indices and Tables diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index c61bf451eb7..ee89b718777 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -101,6 +101,31 @@ Password complexity rules for "builtin" accounts can be adjusted with a variety - :ref:`:PVGoodStrength` - :ref:`:PVCustomPasswordResetAlertMessage` +.. _ongoing-security: + +Ongoing Security of Your Installation ++++++++++++++++++++++++++++++++++++++ + +Like any application, you should keep up-to-date with patches to both the Dataverse software and the platform (usually Linux) it runs on. Dataverse releases are announced on the dataverse-community_ mailing list, the Dataverse blog_, and in chat.dataverse.org_. + +.. _dataverse-community: https://groups.google.com/g/dataverse-community +.. _blog: https://dataverse.org/blog +.. _chat.dataverse.org: https://chat.dataverse.org + +In addition to these public channels, you can subscribe to receive security notices via email from the Dataverse team. These notices are sent to the ``contact_email`` in the installation spreadsheet_ and you can open an issue in the dataverse-installations_ repo to add or change the contact email. Security notices are also sent to people and organizations that prefer to remain anonymous. To be added to this private list, please email support@dataverse.org. + +.. _spreadsheet: https://docs.google.com/spreadsheets/d/1bfsw7gnHlHerLXuk7YprUT68liHfcaMxs1rFciA-mEo/edit#gid=0 +.. _dataverse-installations: https://github.com/IQSS/dataverse-installations + +For additional details about security practices by the Dataverse team, see the :doc:`/developers/security` section of the Developer Guide. + +.. _reporting-security-issues: + +Reporting Security Issues ++++++++++++++++++++++++++ + +If you have a security issue to report, please email it to security@dataverse.org. + .. _network-ports: Network Ports @@ -238,6 +263,153 @@ As for the "Remote only" authentication mode, it means that: - ``:DefaultAuthProvider`` has been set to use the desired authentication provider - The "builtin" authentication provider has been disabled (:ref:`api-toggle-auth-provider`). Note that disabling the "builtin" authentication provider means that the API endpoint for converting an account from a remote auth provider will not work. Converting directly from one remote authentication provider to another (i.e. from GitHub to Google) is not supported. Conversion from remote is always to "builtin". Then the user initiates a conversion from "builtin" to remote. Note that longer term, the plan is to permit multiple login options to the same Dataverse installation account per https://github.com/IQSS/dataverse/issues/3487 (so all this talk of conversion will be moot) but for now users can only use a single login option, as explained in the :doc:`/user/account` section of the User Guide. In short, "remote only" might work for you if you only plan to use a single remote authentication provider such that no conversion between remote authentication providers will be necessary. +.. _database-persistence: + +Database Persistence +-------------------- + +The Dataverse software uses a PostgreSQL database to store objects users create. +You can configure basic and advanced settings for the PostgreSQL database connection with the help of +MicroProfile Config API. + +Basic Database Settings ++++++++++++++++++++++++ + +1. Any of these settings can be set via system properties (see :ref:`jvm-options` starting at :ref:`dataverse.db.name`), environment variables or other + MicroProfile Config mechanisms supported by the app server. + `See Payara docs for supported sources `_. +2. Remember to protect your secrets. For passwords, use an environment variable (bare minimum), a password alias named the same + as the key (OK) or use the "dir config source" of Payara (best). + + Alias creation example: + + .. code-block:: shell + + echo "AS_ADMIN_ALIASPASSWORD=changeme" > /tmp/p.txt + asadmin create-password-alias --passwordfile /tmp/p.txt dataverse.db.password + rm /tmp/p.txt + +3. Environment variables follow the key, replacing any dot, colon, dash, etc. into an underscore "_" and all uppercase + letters. Example: ``dataverse.db.host`` -> ``DATAVERSE_DB_HOST`` + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.host + - The PostgreSQL server to connect to. + - ``localhost`` + * - dataverse.db.port + - The PostgreSQL server port to connect to. + - ``5432`` + * - dataverse.db.user + - The PostgreSQL user name to connect with. + - | ``dataverse`` + | (installer sets to ``dvnapp``) + * - dataverse.db.password + - The PostgreSQL users password to connect with. + + **Please note the safety advisory above.** + - *No default* + * - dataverse.db.name + - The PostgreSQL database name to use for the Dataverse installation. + - | ``dataverse`` + | (installer sets to ``dvndb``) + * - dataverse.db.parameters + - Connection parameters, such as ``sslmode=require``. See `Postgres JDBC docs `_ + Note: you don't need to provide the initial "?". + - *Empty string* + +Advanced Database Settings +++++++++++++++++++++++++++ + +The following options are useful in many scenarios. You might be interested in debug output during development or +monitoring performance in production. + +You can find more details within the Payara docs: + +- `User Guide: Connection Pool Configuration `_ +- `Tech Doc: Advanced Connection Pool Configuration `_. + +Connection Validation +^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.is-connection-validation-required + - ``true``: Validate connections, allow server to reconnect in case of failure. + - false + * - dataverse.db.connection-validation-method + - | The method of connection validation: + | ``table|autocommit|meta-data|custom-validation``. + - *Empty string* + * - dataverse.db.validation-table-name + - The name of the table used for validation if the validation method is set to ``table``. + - *Empty string* + * - dataverse.db.validation-classname + - The name of the custom class used for validation if the ``validation-method`` is set to ``custom-validation``. + - *Empty string* + * - dataverse.db.validate-atmost-once-period-in-seconds + - Specifies the time interval in seconds between successive requests to validate a connection at most once. + - ``0`` (disabled) + +Connection & Statement Leaks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.connection-leak-timeout-in-seconds + - Specify timeout when connections count as "leaked". + - ``0`` (disabled) + * - dataverse.db.connection-leak-reclaim + - If enabled, leaked connection will be reclaimed by the pool after connection leak timeout occurs. + - ``false`` + * - dataverse.db.statement-leak-timeout-in-seconds + - Specifiy timeout when statements should be considered to be "leaked". + - ``0`` (disabled) + * - dataverse.db.statement-leak-reclaim + - If enabled, leaked statement will be reclaimed by the pool after statement leak timeout occurs. + - ``false`` + +Logging & Slow Performance +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.statement-timeout-in-seconds + - Timeout property of a connection to enable termination of abnormally long running queries. + - ``-1`` (disabled) + * - dataverse.db.slow-query-threshold-in-seconds + - SQL queries that exceed this time in seconds will be logged. + - ``-1`` (disabled) + * - dataverse.db.log-jdbc-calls + - When set to true, all JDBC calls will be logged allowing tracing of all JDBC interactions including SQL. + - ``false`` + + + .. _file-storage: File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Stores @@ -263,7 +435,9 @@ To support multiple stores, a Dataverse installation now requires an id, type, a Out of the box, a Dataverse installation is configured to use local file storage in the 'file' store by default. You can add additional stores and, as a superuser, configure specific Dataverse collections to use them (by editing the 'General Information' for the Dataverse collection as described in the :doc:`/admin/dataverses-datasets` section). -Note that the "\-Ddataverse.files.directory", if defined, continues to control where temporary files are stored (in the /temp subdir of that directory), independent of the location of any 'file' store defined above. +Note that the "\-Ddataverse.files.directory", if defined, continues to control where temporary files are stored +(in the /temp subdir of that directory), independent of the location of any 'file' store defined above. +(See also the option reference: :ref:`dataverse.files.directory`) If you wish to change which store is used by default, you'll need to delete the existing default storage driver and set a new one using jvm options. @@ -274,6 +448,8 @@ If you wish to change which store is used by default, you'll need to delete the It is also possible to set maximum file upload size limits per store. See the :ref:`:MaxFileUploadSizeInBytes` setting below. +.. _storage-files-dir: + File Storage ++++++++++++ @@ -580,8 +756,7 @@ Optionally, you may provide static credentials for each S3 storage using MicroPr - ``dataverse.files..access-key`` for this storage's "access key ID" - ``dataverse.files..secret-key`` for this storage's "secret access key" -You may provide the values for these via any of the -`supported config sources `_. +You may provide the values for these via any `supported MicroProfile Config API source`_. **WARNING:** @@ -700,6 +875,26 @@ Once you have configured a trusted remote store, you can point your users to the =========================================== ================== ========================================================================== =================== +.. _temporary-file-storage: + +Temporary Upload File Storage ++++++++++++++++++++++++++++++ + +When uploading files via the API or Web UI, you need to be aware that multiple steps are involved to enable +features like ingest processing, transfer to a permanent storage, checking for duplicates, unzipping etc. + +All of these processes are triggered after finishing transfers over the wire and moving the data into a temporary +(configurable) location on disk at :ref:`${dataverse.files.directory} `\ ``/temp``. + +Before being moved there, + +- JSF Web UI uploads are stored at :ref:`${dataverse.files.uploads} `, defaulting to + ``/usr/local/payara5/glassfish/domains/domain1/uploads`` folder in a standard installation. This place is + configurable and might be set to a separate disk volume where stale uploads are purged periodically. +- API uploads are stored at the system's temporary files location indicated by the Java system property + ``java.io.tmpdir``, defaulting to ``/tmp`` on Linux. If this location is backed by a `tmpfs `_ + on your machine, large file uploads via API will cause RAM and/or swap usage bursts. You might want to point this to + a different location, restrict maximum size of it, and monitor for stale uploads. .. _Branding Your Installation: @@ -1044,6 +1239,14 @@ On a new Dataverse installation, users may select from the following licenses or (Note that existing Dataverse installations which are upgraded from 5.9 or previous will only offer CC0 1.0, added automatically during the upgrade to version 5.10.) +If the Dataverse Installation supports multiple languages, the license name/description translations should be added to the ``License`` properties files. (See :ref:`i18n` for more on properties files and internationalization in general.) +To create the key, the license name has to be converted to lowercase, replace space with underscore. + +Example:: + + license.cc0_1.0.description=Creative Commons CC0 1.0 Universal Public Domain Dedication. + license.cc0_1.0.name=CC0 1.0 + You have a lot of control over which licenses and terms are available. You can remove licenses and add new ones. You can decide which license is the default. You can remove "Custom Dataset Terms" as a option. You can remove all licenses and make "Custom Dataset Terms" the only option. Before making changes, you are encouraged to read the :ref:`license-terms` section of the User Guide about why CC0 is the default and what the "Custom Dataset Terms" option allows. @@ -1092,6 +1295,29 @@ Disabling Custom Dataset Terms See :ref:`:AllowCustomTermsOfUse` for how to disable the "Custom Dataset Terms" option. +.. _ChangeLicenseSortOrder: + +Sorting licenses +---------------- + +The default order of licenses in the dropdown in the user interface is as follows: + +* The default license is shown first +* Followed by the remaining installed licenses in the order of installation +* The custom license is at the end + +Only the order of the installed licenses can be changed with the API calls. The default license always remains first and the custom license last. + +The order of licenses can be changed by setting the ``sortOrder`` property of a license. For the purpose of making sorting easier and to allow grouping of the licenses, ``sortOrder`` property does not have to be unique. Licenses with the same ``sortOrder`` are sorted by their ID, i.e., first by the sortOrder, then by the ID. Nevertheless, you can set a unique ``sortOrder`` for every license in order to sort them fully manually. + +The ``sortOrder`` is an whole number and is used to sort licenses in ascending fashion. + +Changing the sorting order of a license specified by the license ``$ID`` is done by superusers using the following API call: + +.. code-block:: bash + + export SORT_ORDER=100 + curl -X PUT -H 'Content-Type: application/json' -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/licenses/$ID/:sortOrder/$SORT_ORDER .. _BagIt File Handler: BagIt File Handler @@ -1385,43 +1611,105 @@ It's also possible to change these values by stopping Payara, editing ``payara5/ dataverse.fqdn ++++++++++++++ -If the Dataverse installation has multiple DNS names, this option specifies the one to be used as the "official" hostname. For example, you may want to have ``dataverse.example.edu``, and not the less appealing ``server-123.example.edu`` to appear exclusively in all the registered global identifiers, etc. +If the Dataverse installation has multiple DNS names, this option specifies the one to be used as the "official" +hostname. For example, you may want to have ``dataverse.example.edu``, and not the less appealing +``server-123.example.edu`` to appear exclusively in all the registered global identifiers, etc. -The password reset feature requires ``dataverse.fqdn`` to be configured. +- Email confirmation links +- Password reset links +- Generating a Private URL +- PID minting +- Exporting to Schema.org format (and showing JSON-LD in HTML's tag) +- Exporting to DDI format +- Which Dataverse installation an "external tool" should return to +- URLs embedded in SWORD API responses +- ... -Configuring ``dataverse.fqdn`` is not enough. Read on for the importance of also setting ``dataverse.siteUrl``. +Usually it will follow the pattern ``https:///``. +*Only* the FQDN part of your Dataverse installation URL can be determined by setting ``dataverse.fqdn``. + +**Notes:** + +- The URL will default to using ``https://`` and no additional port information. If that does not suit your setup, you + can define an additional option, ``dataverse.siteUrl``, :ref:`explained below `, which always + takes precedence. +- Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FQDN``. + Defaults to ``localhost`` when used with ``mp.config.profile=ct`` .. _dataverse.siteUrl: dataverse.siteUrl +++++++++++++++++ -``dataverse.siteUrl`` is used to configure the URL for your Dataverse installation that you plan to advertise to your users. As explained in the :ref:`installation ` docs, this setting is critical for the correct operation of your installation. - -For example, your site URL could be https://dataverse.example.edu +``dataverse.siteUrl`` is used to configure the URL for your Dataverse installation that you plan to advertise to your +users. As explained in the :ref:`installation ` docs, this setting is critical for the correct +operation of your installation. For example, your site URL could be https://dataverse.example.edu . That is, even though +the server might also be available at uglier URLs such as https://server-123.example.edu, the site URL is the +"official" URL. -That is, even though the server might also be available at uglier URLs such as https://server-123.example.edu the site URL is the "official" URL. +That said, some environments may require using a different URL pattern to access your installation. You might need to +use HTTP without "S", a non-standard port and so on. This is especially useful in development or testing environments. -The ``dataverse.siteUrl`` JVM option can be configured by following the procedure under :ref:`jvm-options` or by editing ``domain.xml`` directly. You can specify the protocol, host, and port number. Your ``domain.xml`` file could look like this, for example: +You can provide any custom tailored site URL via ``dataverse.siteUrl``, which always takes precedence. +Example: ``dataverse.siteUrl=http://localhost:8080`` -``-Ddataverse.siteUrl=https://dataverse.example.edu`` +If you wish to change your site URL by changing the domain configuration, you should edit your ``domain.xml`` directly +to avoid problems with colons in commands. Find a line similar to +``-Ddataverse.siteUrl=https://dataverse.example.edu`` and change it. You can specify the +protocol, host, and port number and should not include a trailing slash. -Note that it's also possible to use the ``dataverse.fqdn`` as a variable, if you wish. Here's an example of this as well as a custom port (which is usually not necessary): +**Notes:** -``-Ddataverse.siteUrl=https://${dataverse.fqdn}:444`` +- This setting may be used in combination with variable replacement, referencing :ref:`dataverse.fqdn` with + ``./asadmin create-jvm-options "\-Ddataverse.siteUrl=http\://\${dataverse.fqdn}\:8080"`` +- Remember to restart Payara after editing ``domain.xml``. +- Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SITEURL``. + Defaults to ``http://${dataverse.fqdn}:8080`` when used with ``mp.config.profile=ct`` +- We are absolutely aware that it's confusing to have both ``dataverse.fqdn`` and ``dataverse.siteUrl``. + https://github.com/IQSS/dataverse/issues/6636 is about resolving this confusion. -We are absolutely aware that it's confusing to have both ``dataverse.fqdn`` and ``dataverse.siteUrl``. https://github.com/IQSS/dataverse/issues/6636 is about resolving this confusion. +.. _dataverse.files.directory: dataverse.files.directory +++++++++++++++++++++++++ -This is how you configure the path Dataverse uses for temporary files. (File store specific dataverse.files.\.directory options set the permanent data storage locations.) +Please provide an absolute path to a directory backed by some mounted file system. This directory is used for a number +of purposes: + +1. ``/temp`` after uploading, data is temporarily stored here for ingest and/or before + shipping to the final storage destination. +2. ``/sword`` a place to store uploads via the :doc:`../api/sword` before transfer + to final storage location and/or ingest. +3. ``/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports. + This location is deprecated and might be refactored into a distinct setting in the future. +4. The experimental DCM feature for :doc:`../developers/big-data-support` is able to trigger imports for externally + uploaded files in a directory tree at ``//`` + under certain conditions. This directory may also be used by file stores for :ref:`permanent file storage `, + but this is controlled by other, store-specific settings. + +Defaults to ``/tmp/dataverse``. Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FILES_DIRECTORY``. + +.. _dataverse.files.uploads: + +dataverse.files.uploads ++++++++++++++++++++++++ + +Configure a folder to store the incoming file stream during uploads (before transfering to `${dataverse.files.directory}/temp`). +Please also see :ref:`temporary-file-storage` for more details. +You can use an absolute path or a relative, which is relative to the application server domain directory. + +Defaults to ``./uploads``, which resolves to ``/usr/local/payara5/glassfish/domains/domain1/uploads`` in a default +installation. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FILES_UPLOADS``. dataverse.auth.password-reset-timeout-in-minutes ++++++++++++++++++++++++++++++++++++++++++++++++ Users have 60 minutes to change their passwords by default. You can adjust this value here. +.. _dataverse.db.name: + dataverse.db.name +++++++++++++++++ @@ -1431,6 +1719,8 @@ Defaults to ``dataverse`` (but the installer sets it to ``dvndb``). Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_NAME``. +See also :ref:`database-persistence`. + dataverse.db.user +++++++++++++++++ @@ -1473,30 +1763,118 @@ Defaults to ``5432``, the default PostgreSQL port. Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_PORT``. +.. _dataverse.solr.host: + +dataverse.solr.host ++++++++++++++++++++ + +The hostname of a Solr server to connect to. Remember to restart / redeploy Dataverse after changing the setting +(as with :ref:`:SolrHostColonPort`). + +Defaults to ``localhost``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_HOST``. +Defaults to ``solr``, when used with ``mp.config.profile=ct`` (:ref:`see below <:ApplicationServerSettings>`). + +dataverse.solr.port ++++++++++++++++++++ + +The Solr server port to connect to. Remember to restart / redeploy Dataverse after changing the setting +(as with :ref:`:SolrHostColonPort`). + +Defaults to ``8983``, the default Solr port. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PORT``. + +dataverse.solr.core ++++++++++++++++++++ + +The name of the Solr core to use for this Dataverse installation. Might be used to switch to a different core quickly. +Remember to restart / redeploy Dataverse after changing the setting (as with :ref:`:SolrHostColonPort`). + +Defaults to ``collection1``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_CORE``. + +dataverse.solr.protocol ++++++++++++++++++++++++ + +The Solr server URL protocol for the connection. Remember to restart / redeploy Dataverse after changing the setting +(as with :ref:`:SolrHostColonPort`). + +Defaults to ``http``, but might be set to ``https`` for extra secure Solr installations. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PROTOCOL``. + +dataverse.solr.path ++++++++++++++++++++ + +The path part of the Solr endpoint URL (e.g. ``/solr/collection1`` of ``http://localhost:8389/solr/collection1``). +Might be used to target a Solr API at non-default places. Remember to restart / redeploy Dataverse after changing the +setting (as with :ref:`:SolrHostColonPort`). + +Defaults to ``/solr/${dataverse.solr.core}``, interpolating the core name when used. Make sure to include the variable +when using it to configure your core name! + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PATH``. + dataverse.rserve.host +++++++++++++++++++++ -Host name for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Host name for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``localhost``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_HOST``. dataverse.rserve.port +++++++++++++++++++++ -Port number for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Port number for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``6311`` when not configured or no valid integer. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_PORT``. dataverse.rserve.user +++++++++++++++++++++ -Username for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Username for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``rserve``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_USER``. dataverse.rserve.password +++++++++++++++++++++++++ -Password for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Password for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``rserve``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_PASSWORD``. dataverse.rserve.tempdir ++++++++++++++++++++++++ -Temporary directory used by Rserve (defaults to /tmp/Rserv). Note that this location is local to the host on which Rserv is running (specified in ``dataverse.rserve.host`` above). When talking to Rserve, Dataverse needs to know this location in order to generate absolute path names of the files on the other end. +Temporary directory used by Rserve (defaults to ``/tmp/Rserv``). Note that this +location is local to the host on which Rserv is running (specified in +``dataverse.rserve.host`` above). When talking to Rserve, Dataverse needs to +know this location in order to generate absolute path names of the files on the +other end. + +Defaults to ``/tmp/Rserv``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_TEMPDIR``. .. _dataverse.dropbox.key: @@ -1638,8 +2016,6 @@ By default, download URLs to files will be included in Schema.org JSON-LD output ``./asadmin create-jvm-options '-Ddataverse.files.hide-schema-dot-org-download-urls=true'`` -Please note that there are other reasons why download URLs may not be included for certain files such as if a guestbook entry is required or if the file is restricted. - For more on Schema.org JSON-LD, see the :doc:`/admin/metadataexport` section of the Admin Guide. .. _useripaddresssourceheader: @@ -1669,6 +2045,60 @@ This setting is useful in cases such as running your Dataverse installation behi "HTTP_FORWARDED", "HTTP_VIA", "REMOTE_ADDR" + +.. _dataverse.personOrOrg.assumeCommaInPersonName: + +dataverse.personOrOrg.assumeCommaInPersonName ++++++++++++++++++++++++++++++++++++++++++++++ + +Please note that this setting is experimental. + +The Schema.org metadata and OpenAIRE exports and the Schema.org metadata included in DatasetPages try to infer whether each entry in the various fields (e.g. Author, Contributor) is a Person or Organization. If you are sure that +users are following the guidance to add people in the recommended family name, given name order, with a comma, you can set this true to always assume entries without a comma are for Organizations. The default is false. + +.. _dataverse.personOrOrg.orgPhraseArray: + +dataverse.personOrOrg.orgPhraseArray +++++++++++++++++++++++++++++++++++++ + +Please note that this setting is experimental. + +The Schema.org metadata and OpenAIRE exports and the Schema.org metadata included in DatasetPages try to infer whether each entry in the various fields (e.g. Author, Contributor) is a Person or Organization. +If you have examples where an orgization name is being inferred to belong to a person, you can use this setting to force it to be recognized as an organization. +The value is expected to be a JsonArray of strings. Any name that contains one of the strings is assumed to be an organization. For example, "Project" is a word that is not otherwise associated with being an organization. + + +.. _dataverse.api.signature-secret: + +dataverse.api.signature-secret +++++++++++++++++++++++++++++++ + +Context: Dataverse has the ability to create "Signed URLs" for it's API calls. Using a signed URLs is more secure than +providing API tokens, which are long-lived and give the holder all of the permissions of the user. In contrast, signed URLs +are time limited and only allow the action of the API call in the URL. See :ref:`api-exttools-auth` and +:ref:`api-native-signed-url` for more details. + +The key used to sign a URL is created from the API token of the creating user plus a signature-secret provided by an administrator. +**Using a signature-secret is highly recommended.** This setting defaults to an empty string. Using a non-empty +signature-secret makes it impossible for someone who knows an API token from forging signed URLs and provides extra security by +making the overall signing key longer. + +Since the signature-secret is sensitive, you should treat it like a password. Here is an example how to set your shared secret +with the secure method "password alias": + +.. code-block:: shell + + echo "AS_ADMIN_ALIASPASSWORD=change-me-super-secret" > /tmp/password.txt + asadmin create-password-alias --passwordfile /tmp/password.txt dataverse.api.signature-secret + rm /tmp/password.txt + +Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable +``DATAVERSE_API_SIGNATURE_SECRET``. + +**WARNING:** For security, do not use the sources "environment variable" or "system property" (JVM option) in a +production context! Rely on password alias, secrets directory or cloud based sources instead! + + .. _:ApplicationServerSettings: @@ -1684,6 +2114,21 @@ To facilitate large file upload and download, the Dataverse Software installer b and restart Payara to apply your change. +mp.config.profile ++++++++++++++++++ + +MicroProfile Config 2.0 defines the `concept of "profiles" `_. +They can be used to change configuration values by context. This is used in Dataverse to change some configuration +defaults when used inside container context rather classic installations. + +As per the spec, you will need to set the configuration value ``mp.config.profile`` to ``ct`` as early as possible. +This is best done with a system property: + +``./asadmin create-system-properties 'mp.config.profile=ct'`` + +You might also create your own profiles and use these, please refer to the upstream documentation linked above. + + .. _database-settings: Database Settings @@ -2171,6 +2616,8 @@ Limit the number of files in a zip that your Dataverse installation will accept. ``curl -X PUT -d 2048 http://localhost:8080/api/admin/settings/:ZipUploadFilesLimit`` +.. _:SolrHostColonPort: + :SolrHostColonPort ++++++++++++++++++ @@ -2178,6 +2625,8 @@ By default your Dataverse installation will attempt to connect to Solr on port 8 ``curl -X PUT -d localhost:8983 http://localhost:8080/api/admin/settings/:SolrHostColonPort`` +**Note:** instead of using a database setting, you could alternatively use JVM settings like :ref:`dataverse.solr.host`. + :SolrFullTextIndexing +++++++++++++++++++++ @@ -2567,6 +3016,7 @@ The URL for your Repository Storage Abstraction Layer (RSAL) installation. This This setting controls which upload methods are available to users of your Dataverse installation. The following upload methods are available: - ``native/http``: Corresponds to "Upload with HTTP via your browser" and APIs that use HTTP (SWORD and native). +- ``dvwebloader``: Corresponds to :ref:`folder-upload`. Note that ``dataverse.files..upload-redirect`` must be set to "true" on an S3 store for this method to show up in the UI. In addition, :ref:`:WebloaderUrl` must be set. CORS allowed on the S3 bucket. See :ref:`cors-s3-bucket`. - ``dcm/rsync+ssh``: Corresponds to "Upload with rsync+ssh via Data Capture Module (DCM)". A lot of setup is required, as explained in the :doc:`/developers/big-data-support` section of the Developer Guide. Out of the box only ``native/http`` is enabled and will work without further configuration. To add multiple upload method, separate them using a comma like this: @@ -2972,7 +3422,7 @@ For example: ``curl -X PUT -d "This content needs to go through an additional review by the Curation Team before it can be published." http://localhost:8080/api/admin/settings/:DatasetMetadataValidationFailureMsg`` - + :ExternalValidationAdminOverride ++++++++++++++++++++++++++++++++ @@ -3067,3 +3517,12 @@ The interval in seconds between Dataverse calls to Globus to check on upload pro +++++++++++++++++++++++++ A true/false option to add a Globus transfer option to the file download menu which is not yet fully supported in the dataverse-globus app. See :ref:`globus-support` for details. + +.. _:WebloaderUrl: + +:WebloaderUrl ++++++++++++++ + +The URL for main HTML file in https://github.com/gdcc/dvwebloader when that app is deployed. See also :ref:`:UploadMethods` for another required settings. + +.. _supported MicroProfile Config API source: https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html diff --git a/doc/sphinx-guides/source/user/appendix.rst b/doc/sphinx-guides/source/user/appendix.rst index b05459b6aaf..7d60054ae17 100755 --- a/doc/sphinx-guides/source/user/appendix.rst +++ b/doc/sphinx-guides/source/user/appendix.rst @@ -26,8 +26,8 @@ Detailed below are what metadata schemas we support for Citation and Domain Spec - `Geospatial Metadata `__ (`see .tsv version `__): compliant with DDI Lite, DDI 2.5 Codebook, DataCite, and Dublin Core. Country / Nation field uses `ISO 3166-1 `_ controlled vocabulary. - `Social Science & Humanities Metadata `__ (`see .tsv version `__): compliant with DDI Lite, DDI 2.5 Codebook, and Dublin Core. - `Astronomy and Astrophysics Metadata `__ (`see .tsv version `__): These metadata elements can be mapped/exported to the International Virtual Observatory Alliance’s (IVOA) - `VOResource Schema format `__ and is based on - `Virtual Observatory (VO) Discovery and Provenance Metadata `__. + `VOResource Schema format `__ and is based on + `Virtual Observatory (VO) Discovery and Provenance Metadata `__ (`see .tsv version `__). - `Life Sciences Metadata `__ (`see .tsv version `__): based on `ISA-Tab Specification `__, along with controlled vocabulary from subsets of the `OBI Ontology `__ and the `NCBI Taxonomy for Organisms `__. - `Journal Metadata `__ (`see .tsv version `__): based on the `Journal Archiving and Interchange Tag Set, version 1.2 `__. @@ -36,8 +36,12 @@ Experimental Metadata Unlike supported metadata, experimental metadata is not enabled by default in a new Dataverse installation. Feedback via any `channel `_ is welcome! +- `CodeMeta Software Metadata `__: based on the `CodeMeta Software Metadata Schema, version 2.0 `__ (`see .tsv version `__) - `Computational Workflow Metadata `__ (`see .tsv version `__): adapted from `Bioschemas Computational Workflow Profile, version 1.0 `__ and `Codemeta `__. +Please note: these custom metadata schemas are not included in the Solr schema for indexing by default, you will need +to add them as necessary for your custom metadata blocks. See "Update the Solr Schema" in :doc:`../admin/metadatacustomization`. + See Also ~~~~~~~~ diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index ec3bb392ce5..31dd7f9cf78 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -93,6 +93,13 @@ Dropbox Upload Some Dataverse installations support the ability to upload files directly from Dropbox. To do so, click the "Upload from Dropbox" button, log in to Dropbox in the pop-up window, and select the files you'd like to transfer over. +.. _folder-upload: + +Folder Upload +------------- + +Some Dataverse installations support the ability to upload files from a local folder and subfolders. To do this, click the "Upload from Folder" button, select the folder you wish to upload, select/unselect specific files, and click "Start Uploads". More detailed instructions are available in the `DVWebloader wiki `_. + .. _rsync_upload: rsync + SSH Upload @@ -177,11 +184,32 @@ File Handling Certain file types in the Dataverse installation are supported by additional functionality, which can include downloading in different formats, previews, file-level metadata preservation, file-level data citation; and exploration through data visualization and analysis. See the sections below for information about special functionality for specific file types. +.. _file-previews: + File Previews ------------- Dataverse installations can add previewers for common file types uploaded by their research communities. The previews appear on the file page. If a preview tool for a specific file type is available, the preview will be created and will display automatically, after terms have been agreed to or a guestbook entry has been made, if necessary. File previews are not available for restricted files unless they are being accessed using a Private URL. See also :ref:`privateurl`. +Previewers are available for the following file types: + +- Text +- PDF +- Tabular (CSV, Excel, etc., see :doc:`tabulardataingest/index`) +- Code (R, etc.) +- Images (PNG, GIF, JPG) +- Audio (MP3, MPEG, WAV, OGG, M4A) +- Video (MP4, OGG, Quicktime) +- Zip (preview and extract/download) +- HTML +- GeoJSON +- NetCDF/HDF5 (NcML format) +- Hypothes.is + +Additional file types will be added to the `dataverse-previewers `_ repo before they are listed above so please check there for the latest information or to request (or contribute!) an additional file previewer. + +Installation of previewers is explained in the :doc:`/admin/external-tools` section of in the Admin Guide. + Tabular Data Files ------------------ @@ -268,7 +296,7 @@ After you :ref:`upload your files `, you can apply a "Workf |cw-image4| How to Describe Your Computational Workflow -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The Dataverse installation you are using may have enabled Computational Workflow metadata fields for your use. If so, when :ref:`editing your dataset metadata `, you will see the fields described below. @@ -299,6 +327,22 @@ Astronomy (FITS) Metadata found in the header section of `Flexible Image Transport System (FITS) files `_ are automatically extracted by the Dataverse Software, aggregated and displayed in the Astronomy Domain-Specific Metadata of the Dataset that the file belongs to. This FITS file metadata, is therefore searchable and browsable (facets) at the Dataset-level. +.. _geojson: + +GeoJSON +------- + +A map will be shown as a preview of GeoJSON files when the previewer has been enabled (see :ref:`file-previews`). See also a `video demo `_ of the GeoJSON previewer by its author, Kaitlin Newson. + +.. _netcdf-and-hdf5: + +NetCDF and HDF5 +--------------- + +For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML_ (XML) format and save it as an auxiliary file. (See also :doc:`/developers/aux-file-support` in the Developer Guide.) A previewer for these NcML files is available (see :ref:`file-previews`). + +.. _NcML: https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_overview.html + Compressed Files ---------------- diff --git a/doc/sphinx-guides/source/user/find-use-data.rst b/doc/sphinx-guides/source/user/find-use-data.rst index 42e1a2b23d4..2e82a1482b4 100755 --- a/doc/sphinx-guides/source/user/find-use-data.rst +++ b/doc/sphinx-guides/source/user/find-use-data.rst @@ -39,6 +39,13 @@ enter search terms for Dataverse collections, dataset metadata (citation and dom metadata. If you are searching for tabular data files you can also search at the variable level for name and label. To find out more about what each field searches, hover over the field name for a detailed description of the field. +.. _geospatial-search: + +Geospatial Search +----------------- + +Geospatial search is available from the :doc:`/api/search` (look for "geo" parameters). The metadata fields that are geospatially indexed are "West Longitude", "East Longitude", "North Latitude", and "South Latitude" from the "Geographic Bounding Box" field in the "Geospatial Metadata" block. + Browsing a Dataverse Installation --------------------------------- diff --git a/doc/sphinx-guides/source/versions.rst b/doc/sphinx-guides/source/versions.rst index e0a344de9a1..4badeabef40 100755 --- a/doc/sphinx-guides/source/versions.rst +++ b/doc/sphinx-guides/source/versions.rst @@ -4,9 +4,11 @@ Dataverse Software Documentation Versions ========================================= -This list provides a way to refer to the documentation for previous versions of the Dataverse Software. In order to learn more about the updates delivered from one version to another, visit the `Releases `__ page in our GitHub repo. +This list provides a way to refer to the documentation for previous and future versions of the Dataverse Software. In order to learn more about the updates delivered from one version to another, visit the `Releases `__ page in our GitHub repo. -- 5.12.1 +- `develop Git branch `__ +- 5.13 +- `5.12.1 `__ - `5.12 `__ - `5.11.1 `__ - `5.11 `__ diff --git a/modules/container-base/.gitignore b/modules/container-base/.gitignore new file mode 100644 index 00000000000..d75620abf70 --- /dev/null +++ b/modules/container-base/.gitignore @@ -0,0 +1 @@ +.flattened-pom.xml diff --git a/modules/container-base/README.md b/modules/container-base/README.md new file mode 100644 index 00000000000..15011d5c6f4 --- /dev/null +++ b/modules/container-base/README.md @@ -0,0 +1,61 @@ +# Dataverse Base Container Image + +The Dataverse Base Container Image contains primarily a pre-installed and pre-tuned application server with the +necessary software dependencies for deploying and launching a Dataverse repository installation. + +Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks, etc., is all done +at this layer. Application images building from this very base focus on adding deployable Dataverse code and +actual scripts. + +*Note:* Currently, there is no application image. Please watch https://github.com/IQSS/dataverse/issues/8934 + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +The [Dataverse Container Guide - Base Image](https://guides.dataverse.org/en/latest/container/base-image.html) +provides in-depth information about content, building, tuning and so on for this image. + +**Where to get help and ask questions:** + +IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at +https://dataversecommunity.slack.com to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-base) +happens there (again, by the community). Community-supported image tags are based on the two most important branches: + +- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-base/src/main/docker/Dockerfile)) +- The `stable` tag corresponds to the `master` branch, where releases are cut from. + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-base/src/main/docker/Dockerfile)) + +Within the main repository, you may find the base image files at `/modules/container-base`. +This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures +Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As with all Docker images, all images likely also contain other software which may be under other licenses (such as +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base +distribution, along with any direct or indirect (Java) dependencies contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml new file mode 100644 index 00000000000..bbee6ad67d5 --- /dev/null +++ b/modules/container-base/pom.xml @@ -0,0 +1,176 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + ../dataverse-parent + + + io.gdcc + container-base + ${packaging.type} + Container Base Image + This module provides an application server base image to be decorated with the Dataverse app. + + + + poikilotherm + Oliver Bertuch + github@bertuch.eu + Europe/Berlin + + maintainer + + + + + + + + pom + + + + + ct + + docker-build + gdcc/base:${base.image.tag} + unstable + eclipse-temurin:${target.java.version}-jre + 1000 + 1000 + + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + unpack + initialize + + unpack + + + + + fish.payara.distributions + payara + ${payara.version} + zip + false + ${project.build.directory} + + + ^payara\d + payara + + + + + + + + + + + + io.fabric8 + docker-maven-plugin + true + + + + base + ${base.image} + + + + linux/arm64 + linux/amd64 + + ${project.build.directory}/buildx-state + + Dockerfile + + ${java.image} + ${base.image.uid} + ${base.image.gid} + + @ + + assembly.xml + + + + + + + + + + org.codehaus.mojo + flatten-maven-plugin + 1.2.7 + + true + oss + + remove + remove + + + + + + flatten + process-resources + + flatten + + + + + flatten.clean + clean + + clean + + + + + + + + maven-install-plugin + + + default-install + install + + install + + + + + + + + + \ No newline at end of file diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile new file mode 100644 index 00000000000..07968e92359 --- /dev/null +++ b/modules/container-base/src/main/docker/Dockerfile @@ -0,0 +1,231 @@ +# Copyright 2022 Forschungszentrum Jülich GmbH +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +################################################################################################################ +# +# THIS FILE IS TO BE USED WITH MAVEN DOCKER BUILD: +# mvn -Pct clean package docker:build +# +################################################################################################################ +# +# Some commands used are inspired by https://github.com/payara/Payara/tree/master/appserver/extras/docker-images. +# Most parts origin from older versions of https://github.com/gdcc/dataverse-kubernetes. +# +# We are not using upstream Payara images because: +# - Using same base image as Solr (https://hub.docker.com/_/solr) is reducing pulls +# - Their image is less optimised for production usage and Dataverse by design choices +# - We provide multi-arch images +# - We provide some tweaks for development and monitoring +# + +# Make the Java base image and version configurable (useful for trying newer Java versions and flavors) +ARG JAVA_IMAGE="eclipse-temurin:11-jre" +FROM $JAVA_IMAGE + +# Default payara ports to expose +# 4848: admin console +# 9009: debug port (JDWP) +# 8080: http +# 8181: https - but http-listener-2 is disabled here! +# 8686: JMX +EXPOSE 4848 9009 8080 8686 + +ENV HOME_DIR="/opt/payara" +ENV PAYARA_DIR="${HOME_DIR}/appserver" \ + SCRIPT_DIR="${HOME_DIR}/scripts" \ + CONFIG_DIR="${HOME_DIR}/config" \ + DEPLOY_DIR="${HOME_DIR}/deployments" \ + STORAGE_DIR="/dv" \ + SECRETS_DIR="/secrets" \ + DUMPS_DIR="/dumps" \ + PASSWORD_FILE="${HOME_DIR}/passwordFile" \ + ADMIN_USER="admin" \ + ADMIN_PASSWORD="admin" \ + DOMAIN_NAME="domain1" \ + PAYARA_ARGS="" +ENV PATH="${PATH}:${PAYARA_DIR}/bin:${SCRIPT_DIR}" \ + DOMAIN_DIR="${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}" \ + DEPLOY_PROPS="" \ + PREBOOT_COMMANDS="${CONFIG_DIR}/pre-boot-commands.asadmin" \ + POSTBOOT_COMMANDS="${CONFIG_DIR}/post-boot-commands.asadmin" \ + JVM_ARGS="" \ + MEM_MAX_RAM_PERCENTAGE="70.0" \ + MEM_XSS="512k" \ + # Source: https://github.com/fabric8io-images/run-java-sh/blob/master/TUNING.md#recommandations + MEM_MIN_HEAP_FREE_RATIO="20" \ + MEM_MAX_HEAP_FREE_RATIO="40" \ + MEM_MAX_GC_PAUSE_MILLIS="500" \ + MEM_METASPACE_SIZE="256m" \ + MEM_MAX_METASPACE_SIZE="2g" \ + # Make heap dumps on OOM appear in DUMPS_DIR + ENABLE_DUMPS=0 \ + JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError" \ + ENABLE_JMX=0 \ + ENABLE_JDWP=0 \ + ENABLE_RELOAD=0 + +### PART 1: SYSTEM ### +ARG UID=1000 +ARG GID=1000 +USER root +WORKDIR / +SHELL ["/bin/bash", "-euo", "pipefail", "-c"] +# Mark these directories as mutuable data containers to avoid cluttering the images overlayfs at runtime. +VOLUME ${STORAGE_DIR} ${SECRETS_DIR} ${DUMPS_DIR} +RUN <> /tmp/password-change-file.txt + echo "AS_ADMIN_PASSWORD=${ADMIN_PASSWORD}" >> ${PASSWORD_FILE} + asadmin --user=${ADMIN_USER} --passwordfile=/tmp/password-change-file.txt change-admin-password --domain_name=${DOMAIN_NAME} + # Start domain for configuration + ${ASADMIN} start-domain ${DOMAIN_NAME} + # Allow access to admin with password only + ${ASADMIN} enable-secure-admin + + ### CONTAINER USAGE ENABLEMENT + # List & delete memory settings from domain + for MEMORY_JVM_OPTION in $(${ASADMIN} list-jvm-options | grep "Xm[sx]\|Xss\|NewRatio"); + do + ${ASADMIN} delete-jvm-options $(echo $MEMORY_JVM_OPTION | sed -e 's/:/\\:/g'); + done + # Tweak memory settings for containers + ${ASADMIN} create-jvm-options "-XX\:+UseContainerSupport" + ${ASADMIN} create-jvm-options "-XX\:MaxRAMPercentage=\${ENV=MEM_MAX_RAM_PERCENTAGE}" + ${ASADMIN} create-jvm-options "-Xss\${ENV=MEM_XSS}" + ${ASADMIN} create-jvm-options "-XX\:MinHeapFreeRatio=\${ENV=MEM_MIN_HEAP_FREE_RATIO}" + ${ASADMIN} create-jvm-options "-XX\:MaxHeapFreeRatio=\${ENV=MEM_MAX_HEAP_FREE_RATIO}" + ${ASADMIN} create-jvm-options "-XX\:HeapDumpPath=\${ENV=DUMPS_DIR}" + # Set logging to console only for containers + ${ASADMIN} set-log-attributes com.sun.enterprise.server.logging.GFFileHandler.logtoFile=false \ + + ### PRODUCTION READINESS + ${ASADMIN} create-jvm-options '-XX\:+UseG1GC' + ${ASADMIN} create-jvm-options '-XX\:+UseStringDeduplication' + ${ASADMIN} create-jvm-options '-XX\:+DisableExplicitGC' + ${ASADMIN} create-jvm-options '-XX\:MaxGCPauseMillis=${ENV=MEM_MAX_GC_PAUSE_MILLIS}' + ${ASADMIN} create-jvm-options '-XX\:MetaspaceSize=${ENV=MEM_METASPACE_SIZE}' + ${ASADMIN} create-jvm-options '-XX\:MaxMetaspaceSize=${ENV=MEM_MAX_METASPACE_SIZE}' + ${ASADMIN} create-jvm-options '-XX\:+IgnoreUnrecognizedVMOptions' + # Disable autodeploy and hot reload + ${ASADMIN} set configs.config.server-config.admin-service.das-config.dynamic-reload-enabled="false" + ${ASADMIN} set configs.config.server-config.admin-service.das-config.autodeploy-enabled="false" + # Enlarge thread pools + ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-thread-pool-size="50" + ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-queue-size="" + ${ASADMIN} set default-config.thread-pools.thread-pool.thread-pool-1.max-thread-pool-size="250" + # Enable file caching + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" + ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" + ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" + # Disable the HTTPS listener (we are always fronting our appservers with a reverse proxy handling SSL) + ${ASADMIN} set configs.config.server-config.network-config.network-listeners.network-listener.http-listener-2.enabled="false" + # Enlarge and tune EJB pools (cannot do this for server-config as set does not create new entries) + ${ASADMIN} set default-config.ejb-container.pool-resize-quantity="2" + ${ASADMIN} set default-config.ejb-container.max-pool-size="128" + ${ASADMIN} set default-config.ejb-container.steady-pool-size="10" + # Misc settings + ${ASADMIN} create-system-properties fish.payara.classloading.delegate="false" + ${ASADMIN} create-system-properties jersey.config.client.readTimeout="300000" + ${ASADMIN} create-system-properties jersey.config.client.connectTimeout="300000" \ + + ### DATAVERSE APPLICATION SPECIFICS + # Configure the MicroProfile directory config source to point to /secrets + ${ASADMIN} set-config-dir --directory="${SECRETS_DIR}" + # Make request timeouts configurable via MPCONFIG (default to 900 secs = 15 min) + ${ASADMIN} set 'server-config.network-config.protocols.protocol.http-listener-1.http.request-timeout-seconds=${MPCONFIG=dataverse.http.timeout:900}' + # TODO: what of the below 3 items can be deleted for container usage? + ${ASADMIN} create-network-listener --protocol=http-listener-1 --listenerport=8009 --jkenabled=true jk-connector + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.comet-support-enabled=true + ${ASADMIN} create-system-properties javax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl + # Always disable phoning home... + ${ASADMIN} disable-phone-home \ + + ### CLEANUP + # Stop domain + ${ASADMIN} stop-domain "${DOMAIN_NAME}" + # Disable JSP servlet dynamic reloads + sed -i 's#org.apache.jasper.servlet.JspServlet#org.apache.jasper.servlet.JspServlet\n \n development\n false\n \n \n genStrAsCharArray\n true\n #' "${DOMAIN_DIR}/config/default-web.xml" + # Cleanup old CA certificates to avoid unnecessary log clutter during startup + ${SCRIPT_DIR}/removeExpiredCaCerts.sh + # Delete generated files + rm -rf \ + "/tmp/password-change-file.txt" \ + "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/osgi-cache" \ + "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/logs" +EOF + +# Set the entrypoint to tini (as a process supervisor) +ENTRYPOINT ["/usr/bin/dumb-init", "--"] +# This works because we add ${SCRIPT_DIR} to $PATH above! +CMD ["entrypoint.sh"] + +LABEL org.opencontainers.image.created="@git.build.time@" \ + org.opencontainers.image.authors="Research Data Management at FZJ " \ + org.opencontainers.image.url="https://guides.dataverse.org/en/latest/container/" \ + org.opencontainers.image.documentation="https://guides.dataverse.org/en/latest/container/" \ + org.opencontainers.image.source="https://github.com/IQSS/dataverse/tree/develop/modules/container-base" \ + org.opencontainers.image.version="@project.version@" \ + org.opencontainers.image.revision="@git.commit.id.abbrev@" \ + org.opencontainers.image.vendor="Global Dataverse Community Consortium" \ + org.opencontainers.image.licenses="Apache-2.0" \ + org.opencontainers.image.title="Dataverse Base Image" \ + org.opencontainers.image.description="This container image provides an application server tuned for Dataverse software" diff --git a/modules/container-base/src/main/docker/assembly.xml b/modules/container-base/src/main/docker/assembly.xml new file mode 100644 index 00000000000..9fc62d49fa1 --- /dev/null +++ b/modules/container-base/src/main/docker/assembly.xml @@ -0,0 +1,17 @@ + + + + + ${project.basedir}/target/payara + appserver + + + + ${project.basedir}/src/main/docker/scripts + scripts + 0755 + + + \ No newline at end of file diff --git a/modules/container-base/src/main/docker/scripts/entrypoint.sh b/modules/container-base/src/main/docker/scripts/entrypoint.sh new file mode 100644 index 00000000000..47933bd42e2 --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/entrypoint.sh @@ -0,0 +1,33 @@ +#!/usr/bin/dumb-init /bin/bash +########################################################################################################## +# +# This script is a fork of https://github.com/payara/Payara/blob/master/appserver/extras/docker-images/ +# server-full/src/main/docker/bin/entrypoint.sh and licensed under CDDL 1.1 by the Payara Foundation. +# +########################################################################################################## + +# This shellscript is supposed to be executed by https://github.com/Yelp/dumb-init to keep subprocesses +# and zombies under control. If the ENTRYPOINT command is changed, it will still use dumb-init because shebang. +# dumb-init takes care to send any signals to subshells, too! (Which might run in the background...) + + +# Execute any scripts BEFORE the appserver starts +for f in "${SCRIPT_DIR}"/init_* "${SCRIPT_DIR}"/init.d/*; do + # shellcheck disable=SC1090 + case "$f" in + *.sh) echo "[Entrypoint] running $f"; . "$f" ;; + *) echo "[Entrypoint] ignoring $f" ;; + esac + echo +done + +# If present, run a startInBackground.sh in the background (e.g. to run tasks AFTER the application server starts) +if [ -x "${SCRIPT_DIR}/startInBackground.sh" ]; then + echo "[Entrypoint] running ${SCRIPT_DIR}/startInBackground.sh in background" + "${SCRIPT_DIR}"/startInBackground.sh & +fi + +# Start the application server and make it REPLACE this shell, so init system and Java directly interact +# Remember - this means no code below this statement will be run! +echo "[Entrypoint] running ${SCRIPT_DIR}/startInForeground.sh in foreground" +exec "${SCRIPT_DIR}"/startInForeground.sh "${PAYARA_ARGS}" diff --git a/modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh b/modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh new file mode 100644 index 00000000000..e2d717af666 --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh @@ -0,0 +1,65 @@ +#!/bin/bash +########################################################################################################## +# +# A script to append deploy commands to the post boot command file at +# $PAYARA_HOME/scripts/post-boot-commands.asadmin file. All applications in the +# $DEPLOY_DIR (either files or folders) will be deployed. +# The $POSTBOOT_COMMANDS file can then be used with the start-domain using the +# --postbootcommandfile parameter to deploy applications on startup. +# +# Usage: +# ./generate_deploy_commands.sh +# +# Optionally, any number of parameters of the asadmin deploy command can be +# specified as parameters to this script. +# E.g., to deploy applications with implicit CDI scanning disabled: +# +# ./generate_deploy_commands.sh --properties=implicitCdiEnabled=false +# +# Environment variables used: +# - $PREBOOT_COMMANDS - the pre boot command file. +# - $POSTBOOT_COMMANDS - the post boot command file. +# +# Note that many parameters to the deploy command can be safely used only when +# a single application exists in the $DEPLOY_DIR directory. +# +########################################################################################################## +# +# This script is a fork of https://github.com/payara/Payara/blob/master/appserver/extras/docker-images/ +# server-full/src/main/docker/bin/init_1_generate_deploy_commands.sh and licensed under CDDL 1.1 +# by the Payara Foundation. +# +########################################################################################################## + +# Check required variables are set +if [ -z "$DEPLOY_DIR" ]; then echo "Variable DEPLOY_DIR is not set."; exit 1; fi +if [ -z "$PREBOOT_COMMANDS" ]; then echo "Variable PREBOOT_COMMANDS is not set."; exit 1; fi +if [ -z "$POSTBOOT_COMMANDS" ]; then echo "Variable POSTBOOT_COMMANDS is not set."; exit 1; fi + +# Create pre and post boot command files if they don't exist +touch "$POSTBOOT_COMMANDS" +touch "$PREBOOT_COMMANDS" + +deploy() { + + if [ -z "$1" ]; then + echo "No deployment specified"; + exit 1; + fi + + DEPLOY_STATEMENT="deploy $DEPLOY_PROPS $1" + if grep -q "$1" "$POSTBOOT_COMMANDS"; then + echo "post boot commands already deploys $1"; + else + echo "Adding deployment target $1 to post boot commands"; + echo "$DEPLOY_STATEMENT" >> "$POSTBOOT_COMMANDS"; + fi +} + +# RAR files first +find "$DEPLOY_DIR" -mindepth 1 -maxdepth 1 -name "*.rar" -print0 \ + | while IFS= read -r -d '' file; do deploy "$file"; done + +# Then every other WAR, EAR, JAR or directory +find "$DEPLOY_DIR" -mindepth 1 -maxdepth 1 ! -name "*.rar" -a -name "*.war" -o -name "*.ear" -o -name "*.jar" -o -type d -print0 \ + | while IFS= read -r -d '' file; do deploy "$file"; done \ No newline at end of file diff --git a/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh b/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh new file mode 100644 index 00000000000..bb0984332f7 --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +set -euo pipefail + +###### ###### ###### ###### ###### ###### ###### ###### ###### ###### ###### +# This script enables different development options, like a JMX connector +# usable with VisualVM, JRebel hot-reload support and JDWP debugger service. +# Enable it by adding env vars on startup (e.g. via ConfigMap) +# +# As this script is "sourced" from entrypoint.sh, we can manipulate env vars +# for the parent shell before executing Payara. +###### ###### ###### ###### ###### ###### ###### ###### ###### ###### ###### + +# 0. Init variables +ENABLE_JMX=${ENABLE_JMX:-0} +ENABLE_JDWP=${ENABLE_JDWP:-0} +ENABLE_RELOAD=${ENABLE_RELOAD:-0} + +DV_PREBOOT=${PAYARA_DIR}/dataverse_preboot +echo "# Dataverse preboot configuration for Payara" > "${DV_PREBOOT}" + +# 1. Configure JMX (enabled by default on port 8686, but requires SSL) +# See also https://blog.payara.fish/monitoring-payara-server-with-jconsole +# To still use it, you can use a sidecar container proxying or using JMX via localhost without SSL. +if [ "${ENABLE_JMX}" = "1" ]; then + echo "Enabling unsecured JMX on 0.0.0.0:8686, enabling AMX and tuning monitoring levels to HIGH. You'll need a sidecar for this, as access is allowed from same machine only (without SSL)." + { \ + echo "set configs.config.server-config.amx-configuration.enabled=true" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jvm=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.connector-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.connector-connection-pool=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jdbc-connection-pool=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.web-services-container=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.ejb-container=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.thread-pool=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.http-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.security=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jms-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jersey=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.transaction-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jpa=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.web-container=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.orb=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.deployment=HIGH" + echo "set configs.config.server-config.admin-service.jmx-connector.system.security-enabled=false" + } >> "${DV_PREBOOT}" +fi + +# 2. Enable JDWP via debugging switch +if [ "${ENABLE_JDWP}" = "1" ]; then + echo "Enabling JDWP remote debugging support via asadmin debugging switch." + export PAYARA_ARGS="${PAYARA_ARGS} --debug=true" +fi + +# 3. Enable hot reload +if [ "${ENABLE_RELOAD}" = "1" ]; then + echo "Enabling hot reload of deployments." + echo "set configs.config.server-config.admin-service.das-config.dynamic-reload-enabled=true" >> "${DV_PREBOOT}" +fi + +# 4. Add the commands to the existing preboot file, but insert BEFORE deployment +TMP_PREBOOT=$(mktemp) +cat "${DV_PREBOOT}" "${PREBOOT_COMMANDS}" > "${TMP_PREBOOT}" +mv "${TMP_PREBOOT}" "${PREBOOT_COMMANDS}" +echo "DEBUG: preboot contains the following commands:" +echo "--------------------------------------------------" +cat "${PREBOOT_COMMANDS}" +echo "--------------------------------------------------" \ No newline at end of file diff --git a/modules/container-base/src/main/docker/scripts/removeExpiredCaCerts.sh b/modules/container-base/src/main/docker/scripts/removeExpiredCaCerts.sh new file mode 100644 index 00000000000..205a9eda5d7 --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/removeExpiredCaCerts.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# Remove expired certs from a keystore +# ------------------------------------ +# This script was copied from https://gist.github.com/damkh/a4a0d74891f92b0285a3853418357c1e (thanks @damkh) +# and slightly modified to be used within our scenario and comply with shellcheck good practices. + +set -euo pipefail + +KEYSTORE="${DOMAIN_DIR}/config/cacerts.jks" +keytool -list -v -keystore "${KEYSTORE}" -storepass changeit 2>/dev/null | \ + grep -i 'alias\|until' > aliases.txt + +i=1 +# Split dates and aliases to different arrays +while read -r p; do + # uneven lines are dates, evens are aliases + if ! ((i % 2)); then + arr_date+=("$p") + else + arr_cn+=("$p") + fi + i=$((i+1)) +done < aliases.txt +i=0 + +# Parse until-dates -> +# convert until-dates to "seconds from 01-01-1970"-format -> +# compare until-dates with today-date -> +# delete expired aliases +for date_idx in $(seq 0 $((${#arr_date[*]}-1))); +do + a_date=$(echo "${arr_date[$date_idx]}" | awk -F"until: " '{print $2}') + if [ "$(date +%s --date="$a_date")" -lt "$(date +%s)" ]; + then + echo "removing ${arr_cn[$i]} expired: $a_date" + alias_name=$(echo "${arr_cn[$i]}" | awk -F"name: " '{print $2}') + keytool -delete -alias "$alias_name" -keystore "${KEYSTORE}" -storepass changeit + fi + i=$((i+1)) +done +echo "Done." \ No newline at end of file diff --git a/modules/container-base/src/main/docker/scripts/startInForeground.sh b/modules/container-base/src/main/docker/scripts/startInForeground.sh new file mode 100644 index 00000000000..4843f6ae055 --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/startInForeground.sh @@ -0,0 +1,89 @@ +#!/bin/bash +########################################################################################################## +# +# This script is to execute Payara Server in foreground, mainly in a docker environment. +# It allows to avoid running 2 instances of JVM, which happens with the start-domain --verbose command. +# +# Usage: +# Running +# startInForeground.sh +# is equivalent to running +# asadmin start-domain +# +# It's possible to use any arguments of the start-domain command as arguments to startInForeground.sh +# +# Environment variables used: +# - $ADMIN_USER - the username to use for the asadmin utility. +# - $PASSWORD_FILE - the password file to use for the asadmin utility. +# - $PREBOOT_COMMANDS - the pre boot command file. +# - $POSTBOOT_COMMANDS - the post boot command file. +# - $DOMAIN_NAME - the name of the domain to start. +# - $JVM_ARGS - extra JVM options to pass to the Payara Server instance. +# - $AS_ADMIN_MASTERPASSWORD - the master password for the Payara Server instance. +# +# This script executes the asadmin tool which is expected at ~/appserver/bin/asadmin. +# +########################################################################################################## +# +# This script is a fork of https://github.com/payara/Payara/blob/master/appserver/ +# extras/docker-images/server-full/src/main/docker/bin/startInForeground.sh and licensed under CDDL 1.1 +# by the Payara Foundation. +# +########################################################################################################## + +# Check required variables are set +if [ -z "$ADMIN_USER" ]; then echo "Variable ADMIN_USER is not set."; exit 1; fi +if [ -z "$PASSWORD_FILE" ]; then echo "Variable PASSWORD_FILE is not set."; exit 1; fi +if [ -z "$PREBOOT_COMMANDS" ]; then echo "Variable PREBOOT_COMMANDS is not set."; exit 1; fi +if [ -z "$POSTBOOT_COMMANDS" ]; then echo "Variable POSTBOOT_COMMANDS is not set."; exit 1; fi +if [ -z "$DOMAIN_NAME" ]; then echo "Variable DOMAIN_NAME is not set."; exit 1; fi + +# Check if dumps are enabled - add arg to JVM_ARGS in this case +if [ -n "${ENABLE_DUMPS}" ] && [ "${ENABLE_DUMPS}" = "1" ]; then + JVM_ARGS="${JVM_DUMPS_ARG} ${JVM_ARGS}" +fi + +# The following command gets the command line to be executed by start-domain +# - print the command line to the server with --dry-run, each argument on a separate line +# - remove -read-string argument +# - surround each line except with parenthesis to allow spaces in paths +# - remove lines before and after the command line and squash commands on a single line + +# Create pre and post boot command files if they don't exist +touch "$POSTBOOT_COMMANDS" +touch "$PREBOOT_COMMANDS" + +# shellcheck disable=SC2068 +# -- Using $@ is necessary here as asadmin cannot deal with options enclosed in ""! +OUTPUT=$("${PAYARA_DIR}"/bin/asadmin --user="${ADMIN_USER}" --passwordfile="${PASSWORD_FILE}" start-domain --dry-run --prebootcommandfile="${PREBOOT_COMMANDS}" --postbootcommandfile="${POSTBOOT_COMMANDS}" $@ "$DOMAIN_NAME") +STATUS=$? +if [ "$STATUS" -ne 0 ] + then + echo ERROR: "$OUTPUT" >&2 + exit 1 +fi + +COMMAND=$(echo "$OUTPUT"\ + | sed -n -e '2,/^$/p'\ + | sed "s|glassfish.jar|glassfish.jar $JVM_ARGS |g") + +echo Executing Payara Server with the following command line: +echo "$COMMAND" | tr ' ' '\n' +echo + +# Run the server in foreground - read master password from variable or file or use the default "changeit" password + +set +x +if test "$AS_ADMIN_MASTERPASSWORD"x = x -a -f "$PASSWORD_FILE" + then + # shellcheck disable=SC1090 + source "$PASSWORD_FILE" +fi +if test "$AS_ADMIN_MASTERPASSWORD"x = x + then + AS_ADMIN_MASTERPASSWORD=changeit +fi +echo "AS_ADMIN_MASTERPASSWORD=$AS_ADMIN_MASTERPASSWORD" > /tmp/masterpwdfile +# shellcheck disable=SC2086 +# -- Unquoted exec var is necessary, as otherwise things get escaped that may not be escaped (parameters for Java) +exec ${COMMAND} < /tmp/masterpwdfile diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index c1ba693da1b..d85d8aed5a1 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -13,6 +13,7 @@ ../../pom.xml ../../scripts/zipdownload + ../container-base - 5.12.1 + 5.13 11 UTF-8 @@ -147,7 +148,7 @@ 5.2022.3 - 42.5.0 + 42.5.1 8.11.1 1.12.290 0.177.0 @@ -163,7 +164,7 @@ 4.4.14 - 5.0.0-RC1 + 5.0.0 1.15.0 @@ -181,10 +182,14 @@ 3.2.2 3.3.2 3.2.0 + 3.0.0-M1 3.0.0-M5 3.0.0-M5 3.3.0 3.1.2 + + + 0.40.2 @@ -225,6 +230,11 @@ maven-dependency-plugin ${maven-dependency-plugin.version} + + org.apache.maven.plugins + maven-install-plugin + ${maven-install-plugin.version} + org.apache.maven.plugins maven-surefire-plugin @@ -247,6 +257,11 @@ + + io.fabric8 + docker-maven-plugin + ${fabric8-dmp.version} + @@ -299,6 +314,11 @@ true + + unidata-all + Unidata All + https://artifacts.unidata.ucar.edu/repository/unidata-all/ + dvn.private Local repository for hosting jars not available from network repositories. @@ -318,4 +338,44 @@ --> + + + ct + + + 5.2022.4 + + + + + + + io.github.git-commit-id + git-commit-id-maven-plugin + 5.0.0 + + + retrieve-git-details + + revision + + initialize + + + + ${project.basedir}/../../.git + UTC + 8 + false + + + + + + + + diff --git a/pom.xml b/pom.xml index c6459cfc55c..8b6f98c5896 100644 --- a/pom.xml +++ b/pom.xml @@ -25,6 +25,7 @@ 0.8.7 5.2.1 2.4.1 + 5.5.3 org.junit.jupiter diff --git a/scripts/api/data/licenses/licenseCC-BY-4.0.json b/scripts/api/data/licenses/licenseCC-BY-4.0.json index 5596e65e947..59201b8d08e 100644 --- a/scripts/api/data/licenses/licenseCC-BY-4.0.json +++ b/scripts/api/data/licenses/licenseCC-BY-4.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by/4.0", "shortDescription": "Creative Commons Attribution 4.0 International License.", "iconUrl": "https://licensebuttons.net/l/by/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 2 } diff --git a/scripts/api/data/licenses/licenseCC-BY-NC-4.0.json b/scripts/api/data/licenses/licenseCC-BY-NC-4.0.json index 8154c9ec5df..c19087664db 100644 --- a/scripts/api/data/licenses/licenseCC-BY-NC-4.0.json +++ b/scripts/api/data/licenses/licenseCC-BY-NC-4.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by-nc/4.0", "shortDescription": "Creative Commons Attribution-NonCommercial 4.0 International License.", "iconUrl": "https://licensebuttons.net/l/by-nc/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 4 } diff --git a/scripts/api/data/licenses/licenseCC-BY-NC-ND-4.0.json b/scripts/api/data/licenses/licenseCC-BY-NC-ND-4.0.json index 247ce52f6ea..2e374917d28 100644 --- a/scripts/api/data/licenses/licenseCC-BY-NC-ND-4.0.json +++ b/scripts/api/data/licenses/licenseCC-BY-NC-ND-4.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by-nc-nd/4.0", "shortDescription": "Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International License.", "iconUrl": "https://licensebuttons.net/l/by-nc-nd/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 7 } diff --git a/scripts/api/data/licenses/licenseCC-BY-NC-SA-4.0.json b/scripts/api/data/licenses/licenseCC-BY-NC-SA-4.0.json index e9726fb6374..5018884f65e 100644 --- a/scripts/api/data/licenses/licenseCC-BY-NC-SA-4.0.json +++ b/scripts/api/data/licenses/licenseCC-BY-NC-SA-4.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by-nc-sa/4.0", "shortDescription": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.", "iconUrl": "https://licensebuttons.net/l/by-nc-sa/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 3 } diff --git a/scripts/api/data/licenses/licenseCC-BY-ND-4.0.json b/scripts/api/data/licenses/licenseCC-BY-ND-4.0.json index 7ae81bacc10..317d459a7ae 100644 --- a/scripts/api/data/licenses/licenseCC-BY-ND-4.0.json +++ b/scripts/api/data/licenses/licenseCC-BY-ND-4.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by-nd/4.0", "shortDescription": "Creative Commons Attribution-NoDerivatives 4.0 International License.", "iconUrl": "https://licensebuttons.net/l/by-nd/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 6 } diff --git a/scripts/api/data/licenses/licenseCC-BY-SA-4.0.json b/scripts/api/data/licenses/licenseCC-BY-SA-4.0.json index e9a02880885..0d28c9423aa 100644 --- a/scripts/api/data/licenses/licenseCC-BY-SA-4.0.json +++ b/scripts/api/data/licenses/licenseCC-BY-SA-4.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by-sa/4.0", "shortDescription": "Creative Commons Attribution-ShareAlike 4.0 International License.", "iconUrl": "https://licensebuttons.net/l/by-sa/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 5 } diff --git a/scripts/api/data/licenses/licenseCC0-1.0.json b/scripts/api/data/licenses/licenseCC0-1.0.json index 396ba133327..216260a5de8 100644 --- a/scripts/api/data/licenses/licenseCC0-1.0.json +++ b/scripts/api/data/licenses/licenseCC0-1.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/publicdomain/zero/1.0", "shortDescription": "Creative Commons CC0 1.0 Universal Public Domain Dedication.", "iconUrl": "https://licensebuttons.net/p/zero/1.0/88x31.png", - "active": true + "active": true, + "sortOrder": 1 } diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index 29d121aae16..be32bb7134e 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -43,7 +43,7 @@ producerURL URL The URL of the producer's website https:// url 39 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation producerLogoURL Logo URL The URL of the producer's logo https:// url 40
FALSE FALSE FALSE FALSE FALSE FALSE producer citation productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 41 TRUE FALSE FALSE TRUE FALSE FALSE citation - productionPlace Production Location The location where the data and any related materials were produced or collected text 42 FALSE FALSE FALSE FALSE FALSE FALSE citation + productionPlace Production Location The location where the data and any related materials were produced or collected text 42 TRUE FALSE TRUE TRUE FALSE FALSE citation contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 43 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor contributorType Type Indicates the type of contribution made to the dataset text 44 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 45 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation @@ -96,22 +96,23 @@ subject Other D12 13 publicationIDType ark 0 publicationIDType arXiv 1 - publicationIDType bibcode 2 - publicationIDType doi 3 - publicationIDType ean13 4 - publicationIDType eissn 5 - publicationIDType handle 6 - publicationIDType isbn 7 - publicationIDType issn 8 - publicationIDType istc 9 - publicationIDType lissn 10 - publicationIDType lsid 11 - publicationIDType pmid 12 - publicationIDType purl 13 - publicationIDType upc 14 - publicationIDType url 15 - publicationIDType urn 16 - publicationIDType DASH-NRS 17 + publicationIDType bibcode 2 + publicationIDType cstr 3 + publicationIDType doi 4 + publicationIDType ean13 5 + publicationIDType eissn 6 + publicationIDType handle 7 + publicationIDType isbn 8 + publicationIDType issn 9 + publicationIDType istc 10 + publicationIDType lissn 11 + publicationIDType lsid 12 + publicationIDType pmid 13 + publicationIDType purl 14 + publicationIDType upc 15 + publicationIDType url 16 + publicationIDType urn 17 + publicationIDType DASH-NRS 18 contributorType Data Collector 0 contributorType Data Curator 1 contributorType Data Manager 2 diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv new file mode 100644 index 00000000000..a5c50368b75 --- /dev/null +++ b/scripts/api/data/metadatablocks/codemeta.tsv @@ -0,0 +1,37 @@ +#metadataBlock name dataverseAlias displayName blockURI + codeMeta20 Software Metadata (CodeMeta v2.0) https://codemeta.github.io/terms/ +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI + codeVersion Software Version Version of the software instance, usually following some convention like SemVer etc. e.g. 0.2.1 or 1.3 or 2021.1 etc text 0 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion + developmentStatus Development Status Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information. text 1 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE codeMeta20 https://www.repostatus.org + codeRepository Code Repository Link to the repository where the un-compiled, human-readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, Gitea, etc.). e.g. https://github.com/user/project url 2 #VALUE TRUE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/codeRepository + applicationCategory Application Category Type of software application, e.g. Simulation, Analysis, Visualisation. text 3 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/applicationCategory + applicationSubCategory Application Subcategory Subcategory of the application, e.g. Arcade Game. text 4 #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/applicationSubCategory + programmingLanguage Programming Language The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...) text 5 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/programmingLanguage + runtimePlatform Runtime Platform Runtime platform or script interpreter dependencies (e.g. Java 11, Python 3.10 or .Net Framework 4.8). e.g. Python 3.10 text 6 #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/runtimePlatform + operatingSystem Operating Systems Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11). text 7 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/operatingSystem + targetProduct Target Product Target Operating System / Product to which the code applies. If applies to several versions, just the product name can be used. text 8 #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/targetProduct + buildInstructions Build Instructions Link to installation instructions/documentation e.g. https://github.com/user/project/blob/main/BUILD.md url 9 #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/buildInstructions + softwareRequirementsItem Software Requirements Required software dependencies none 10 FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 + softwareRequirements Name & Version Name and version of the required software/library dependency e.g. Pandas 1.4.3 text 0 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE softwareRequirementsItem codeMeta20 https://schema.org/softwareRequirements + softwareRequirementsInfoUrl Info URL Link to required software/library homepage or documentation (ideally also versioned) e.g. https://pandas.pydata.org/pandas-docs/version/1.4.3 url 1 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE softwareRequirementsItem codeMeta20 https://dataverse.org/schema/codeMeta20/softwareRequirementsInfoUrl + softwareSuggestionsItem Software Suggestions Optional dependencies, e.g. for optional features, code development, etc. none 11 FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 + softwareSuggestions Name & Version Name and version of the optional software/library dependency e.g. Sphinx 5.0.2 text 0 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE softwareSuggestionsItem codeMeta20 https://codemeta.github.io/terms/softwareSuggestions + softwareSuggestionsInfoUrl Info URL Link to optional software/library homepage or documentation (ideally also versioned) e.g. https://www.sphinx-doc.org url 1 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE softwareSuggestionsItem codeMeta20 https://dataverse.org/schema/codeMeta20/softwareSuggestionsInfoUrl + memoryRequirements Memory Requirements Minimum memory requirements. text 12 #VALUE TRUE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/memoryRequirements + processorRequirements Processor Requirements Processor architecture or other CPU requirements to run the application (e.g. IA64). text 13 #VALUE TRUE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/processorRequirements + storageRequirements Storage Requirements Minimum storage requirements (e.g. free space required). text 14 #VALUE TRUE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/storageRequirements + permissions Permissions Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi). text 15 #VALUE TRUE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/permissions + softwareHelp Software Help/Documentation Link to help texts or documentation e.g. https://user.github.io/project/docs url 16 #VALUE FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/softwareHelp + readme Readme Link to the README of the project e.g. https://github.com/user/project/blob/main/README.md url 17 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/readme + releaseNotes Release Notes Link to release notes e.g. https://github.com/user/project/blob/main/docs/release-0.1.md url 18 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/releaseNotes + contIntegration Continuous Integration Link to continuous integration service e.g. https://github.com/user/project/actions url 19 #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/contIntegration + issueTracker Issue Tracker Link to software bug reporting or issue tracking system e.g. https://github.com/user/project/issues url 20 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/issueTracker +#controlledVocabulary DatasetField Value identifier displayOrder + developmentStatus Concept concept 0 + developmentStatus WIP wip 1 + developmentStatus Active active 2 + developmentStatus Inactive inactive 3 + developmentStatus Unsupported unsupported 4 + developmentStatus Moved moved 5 + developmentStatus Suspended suspended 6 + developmentStatus Abandoned abandoned 7 diff --git a/scripts/api/setup-datasetfields.sh b/scripts/api/setup-datasetfields.sh index 0d79176c099..0d2d60b9538 100755 --- a/scripts/api/setup-datasetfields.sh +++ b/scripts/api/setup-datasetfields.sh @@ -7,4 +7,3 @@ curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @da curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/astrophysics.tsv -H "Content-type: text/tab-separated-values" curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/biomedical.tsv -H "Content-type: text/tab-separated-values" curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/journals.tsv -H "Content-type: text/tab-separated-values" - diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java index a7a89934f47..344032ef5e3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java @@ -55,7 +55,10 @@ public class AuxiliaryFile implements Serializable { private String formatTag; private String formatVersion; - + + /** + * The application/entity that created the auxiliary file. + */ private String origin; private boolean isPublic; diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java index 76c91382868..05f3e209632 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -70,9 +70,13 @@ public AuxiliaryFile save(AuxiliaryFile auxiliaryFile) { * @param type how to group the files such as "DP" for "Differentially * @param mediaType user supplied content type (MIME type) * Private Statistics". - * @return success boolean - returns whether the save was successful + * @param save boolean - true to save immediately, false to let the cascade + * do persist to the database. + * @return an AuxiliaryFile with an id when save=true (assuming no + * exceptions) or an AuxiliaryFile without an id that will be persisted + * later through the cascade. */ - public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType) { + public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType, boolean save) { StorageIO storageIO = null; AuxiliaryFile auxFile = new AuxiliaryFile(); @@ -114,7 +118,14 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile auxFile.setType(type); auxFile.setDataFile(dataFile); auxFile.setFileSize(storageIO.getAuxObjectSize(auxExtension)); - auxFile = save(auxFile); + if (save) { + auxFile = save(auxFile); + } else { + if (dataFile.getAuxiliaryFiles() == null) { + dataFile.setAuxiliaryFiles(new ArrayList<>()); + } + dataFile.getAuxiliaryFiles().add(auxFile); + } } catch (IOException ioex) { logger.severe("IO Exception trying to save auxiliary file: " + ioex.getMessage()); throw new InternalServerErrorException(); @@ -129,7 +140,11 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile } return auxFile; } - + + public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType) { + return processAuxiliaryFile(fileInputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType, true); + } + public AuxiliaryFile lookupAuxiliaryFile(DataFile dataFile, String formatTag, String formatVersion) { Query query = em.createNamedQuery("AuxiliaryFile.lookupAuxiliaryFile"); diff --git a/src/main/java/edu/harvard/iq/dataverse/DashboardPage.java b/src/main/java/edu/harvard/iq/dataverse/DashboardPage.java index 5b6cdd23775..99c7951c96e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DashboardPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DashboardPage.java @@ -97,12 +97,8 @@ public int getNumberOfConfiguredHarvestClients() { } public long getNumberOfHarvestedDatasets() { - List configuredHarvestingClients = harvestingClientService.getAllHarvestingClients(); - if (configuredHarvestingClients == null || configuredHarvestingClients.isEmpty()) { - return 0L; - } - Long numOfDatasets = harvestingClientService.getNumberOfHarvestedDatasetByClients(configuredHarvestingClients); + Long numOfDatasets = harvestingClientService.getNumberOfHarvestedDatasetsByAllClients(); if (numOfDatasets != null && numOfDatasets > 0L) { return numOfDatasets; @@ -142,7 +138,7 @@ public String getHarvestClientsInfoLabel() { infoLabel = configuredHarvestingClients.size() + " harvesting clients configured; "; } - Long numOfDatasets = harvestingClientService.getNumberOfHarvestedDatasetByClients(configuredHarvestingClients); + Long numOfDatasets = harvestingClientService.getNumberOfHarvestedDatasetsByAllClients(); if (numOfDatasets != null && numOfDatasets > 0L) { return infoLabel + numOfDatasets + " harvested datasets"; diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index cb43dff0e20..5171e8d49f2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -569,7 +569,7 @@ public FileMetadata getLatestPublishedFileMetadata() throws UnsupportedOperation if(fmd == null) { throw new UnsupportedOperationException("No published metadata version for DataFile " + this.getId()); } - + return fmd; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 0b935183182..7da06f36be4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1544,6 +1544,10 @@ public void finalizeFileDelete(Long dataFileId, String storageLocation) throws I throw new IOException("Attempted to permanently delete a physical file still associated with an existing DvObject " + "(id: " + dataFileId + ", location: " + storageLocation); } + if(storageLocation == null || storageLocation.isBlank()) { + throw new IOException("Attempted to delete a physical file with no location " + + "(id: " + dataFileId + ", location: " + storageLocation); + } StorageIO directStorageAccess = DataAccess.getDirectStorageIO(storageLocation); directStorageAccess.delete(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index a4f82d41bac..d7e7271738d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -33,6 +33,8 @@ import javax.persistence.Table; import javax.persistence.Temporal; import javax.persistence.TemporalType; + +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -391,19 +393,21 @@ private DatasetVersion createNewDatasetVersion(Template template, FileMetadata f /** * The "edit version" is the most recent *draft* of a dataset, and if the - * latest version of a dataset is published, a new draft will be created. - * + * latest version of a dataset is published, a new draft will be created. If + * you don't want to create a new version, you should be using + * getLatestVersion. + * * @return The edit version {@code this}. */ - public DatasetVersion getEditVersion() { - return getEditVersion(null, null); + public DatasetVersion getOrCreateEditVersion() { + return getOrCreateEditVersion(null, null); } - public DatasetVersion getEditVersion(FileMetadata fm) { - return getEditVersion(null, fm); + public DatasetVersion getOrCreateEditVersion(FileMetadata fm) { + return getOrCreateEditVersion(null, fm); } - public DatasetVersion getEditVersion(Template template, FileMetadata fm) { + public DatasetVersion getOrCreateEditVersion(Template template, FileMetadata fm) { DatasetVersion latestVersion = this.getLatestVersion(); if (!latestVersion.isWorkingCopy() || template != null) { // if the latest version is released or archived, create a new version for editing @@ -528,11 +532,8 @@ private Collection getCategoryNames() { @Deprecated public Path getFileSystemDirectory() { Path studyDir = null; - - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.equals("")) { - filesRootDirectory = "/tmp/files"; - } + + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); if (this.getAlternativePersistentIndentifiers() != null && !this.getAlternativePersistentIndentifiers().isEmpty()) { for (AlternativePersistentIdentifier api : this.getAlternativePersistentIndentifiers()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 0a8db69bf5b..429a0d7a4e4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -56,6 +56,8 @@ import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.WebloaderUtil; import edu.harvard.iq.dataverse.validation.URLValidator; import edu.harvard.iq.dataverse.workflows.WorkflowComment; @@ -1845,7 +1847,9 @@ public boolean globusUploadSupported() { return settingsWrapper.isGlobusUpload() && settingsWrapper.isGlobusEnabledStorageDriver(dataset.getEffectiveStorageDriverId()); } - + public boolean webloaderUploadSupported() { + return settingsWrapper.isWebloaderUpload() && StorageIO.isDirectUploadEnabled(dataset.getEffectiveStorageDriverId()); + } private String init(boolean initFull) { @@ -2067,7 +2071,7 @@ private String init(boolean initFull) { } //Initalize with the default if there is one dataset.setTemplate(selectedTemplate); - workingVersion = dataset.getEditVersion(selectedTemplate, null); + workingVersion = dataset.getOrCreateEditVersion(selectedTemplate, null); updateDatasetFieldInputLevels(); } else { workingVersion = dataset.getCreateVersion(licenseServiceBean.getDefault()); @@ -2401,7 +2405,7 @@ private void resetVersionUI() { AuthenticatedUser au = (AuthenticatedUser) session.getUser(); //On create set pre-populated fields - for (DatasetField dsf : dataset.getEditVersion().getDatasetFields()) { + for (DatasetField dsf : dataset.getOrCreateEditVersion().getDatasetFields()) { if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.depositor) && dsf.isEmpty()) { dsf.getDatasetFieldValues().get(0).setValue(au.getLastName() + ", " + au.getFirstName()); } @@ -2458,7 +2462,7 @@ private void refreshSelectedFiles(List filesToRefresh){ } String termsOfAccess = workingVersion.getTermsOfUseAndAccess().getTermsOfAccess(); boolean requestAccess = workingVersion.getTermsOfUseAndAccess().isFileAccessRequest(); - workingVersion = dataset.getEditVersion(); + workingVersion = dataset.getOrCreateEditVersion(); workingVersion.getTermsOfUseAndAccess().setTermsOfAccess(termsOfAccess); workingVersion.getTermsOfUseAndAccess().setFileAccessRequest(requestAccess); List newSelectedFiles = new ArrayList<>(); @@ -2521,7 +2525,7 @@ public void edit(EditMode editMode) { if (this.readOnly) { dataset = datasetService.find(dataset.getId()); } - workingVersion = dataset.getEditVersion(); + workingVersion = dataset.getOrCreateEditVersion(); clone = workingVersion.cloneDatasetVersion(); if (editMode.equals(EditMode.METADATA)) { datasetVersionUI = datasetVersionUI.initDatasetVersionUI(workingVersion, true); @@ -3452,7 +3456,7 @@ private void deleteFiles(List filesToDelete) { if (markedForDelete.getId() != null) { // This FileMetadata has an id, i.e., it exists in the database. // We are going to remove this filemetadata from the version: - dataset.getEditVersion().getFileMetadatas().remove(markedForDelete); + dataset.getOrCreateEditVersion().getFileMetadatas().remove(markedForDelete); // But the actual delete will be handled inside the UpdateDatasetCommand // (called later on). The list "filesToBeDeleted" is passed to the // command as a parameter: @@ -3678,7 +3682,7 @@ public String save() { // have been created in the dataset. dataset = datasetService.find(dataset.getId()); - List filesAdded = ingestService.saveAndAddFilesToDataset(dataset.getEditVersion(), newFiles, null, true); + List filesAdded = ingestService.saveAndAddFilesToDataset(dataset.getOrCreateEditVersion(), newFiles, null, true); newFiles.clear(); // and another update command: @@ -5490,7 +5494,7 @@ public List getCachedToolsForDataFile(Long fileId, ExternalTool.Ty return cachedTools; } DataFile dataFile = datafileService.find(fileId); - cachedTools = ExternalToolServiceBean.findExternalToolsByFile(externalTools, dataFile); + cachedTools = externalToolService.findExternalToolsByFile(externalTools, dataFile); cachedToolsByFileId.put(fileId, cachedTools); //add to map so we don't have to do the lifting again return cachedTools; } @@ -6062,4 +6066,19 @@ public void startGlobusTransfer() { } PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken)); } + + public String getWebloaderUrlForDataset(Dataset d) { + String localeCode = session.getLocaleCode(); + User user = session.getUser(); + if (user instanceof AuthenticatedUser) { + ApiToken apiToken = authService.getValidApiTokenForUser((AuthenticatedUser) user); + return WebloaderUtil.getWebloaderUrl(d, apiToken, localeCode, + settingsService.getValueForKey(SettingsServiceBean.Key.WebloaderUrl)); + } else { + // Shouldn't normally happen (seesion timeout? bug?) + logger.warning("getWebloaderUrlForDataset called for non-Authenticated user"); + return null; + } + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 30815c43381..c21861a1bf4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.util.MarkupChecker; +import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.DatasetFieldType.FieldType; import edu.harvard.iq.dataverse.branding.BrandingUtil; @@ -842,12 +843,26 @@ public String getDescriptionPlainText() { return MarkupChecker.stripAllTags(getDescription()); } - public List getDescriptionsPlainText() { - List plainTextDescriptions = new ArrayList<>(); + /* This method is (only) used in creating schema.org json-jd where Google requires a text description <5000 chars. + * + * @returns - a single string composed of all descriptions (joined with \n if more than one) truncated with a trailing '...' if >=5000 chars + */ + public String getDescriptionsPlainTextTruncated() { + List plainTextDescriptions = new ArrayList(); + for (String htmlDescription : getDescriptions()) { plainTextDescriptions.add(MarkupChecker.stripAllTags(htmlDescription)); } - return plainTextDescriptions; + String description = String.join("\n", plainTextDescriptions); + if (description.length() >= 5000) { + int endIndex = description.substring(0, 4997).lastIndexOf(" "); + if (endIndex == -1) { + //There are no spaces so just break anyway + endIndex = 4997; + } + description = description.substring(0, endIndex) + "..."; + } + return description; } /** @@ -1802,27 +1817,46 @@ public String getJsonLd() { for (DatasetAuthor datasetAuthor : this.getDatasetAuthors()) { JsonObjectBuilder author = Json.createObjectBuilder(); String name = datasetAuthor.getName().getDisplayValue(); + String identifierAsUrl = datasetAuthor.getIdentifierAsUrl(); DatasetField authorAffiliation = datasetAuthor.getAffiliation(); String affiliation = null; if (authorAffiliation != null) { - affiliation = datasetAuthor.getAffiliation().getDisplayValue(); - } - // We are aware of "givenName" and "familyName" but instead of a person it might be an organization such as "Gallup Organization". - //author.add("@type", "Person"); - author.add("name", name); - // We are aware that the following error is thrown by https://search.google.com/structured-data/testing-tool - // "The property affiliation is not recognized by Google for an object of type Thing." - // Someone at Google has said this is ok. - // This logic could be moved into the `if (authorAffiliation != null)` block above. - if (!StringUtil.isEmpty(affiliation)) { - author.add("affiliation", affiliation); + affiliation = datasetAuthor.getAffiliation().getValue(); } - String identifierAsUrl = datasetAuthor.getIdentifierAsUrl(); - if (identifierAsUrl != null) { - // It would be valid to provide an array of identifiers for authors but we have decided to only provide one. - author.add("@id", identifierAsUrl); - author.add("identifier", identifierAsUrl); + JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, false, (identifierAsUrl!=null)); + String givenName= entity.containsKey("givenName") ? entity.getString("givenName"):null; + String familyName= entity.containsKey("familyName") ? entity.getString("familyName"):null; + + if (entity.getBoolean("isPerson")) { + // Person + author.add("@type", "Person"); + if (givenName != null) { + author.add("givenName", givenName); + } + if (familyName != null) { + author.add("familyName", familyName); + } + if (!StringUtil.isEmpty(affiliation)) { + author.add("affiliation", Json.createObjectBuilder().add("@type", "Organization").add("name", affiliation)); + } + //Currently all possible identifier URLs are for people not Organizations + if(identifierAsUrl != null) { + author.add("sameAs", identifierAsUrl); + //Legacy - not sure if these are still useful + author.add("@id", identifierAsUrl); + author.add("identifier", identifierAsUrl); + + } + } else { + // Organization + author.add("@type", "Organization"); + if (!StringUtil.isEmpty(affiliation)) { + author.add("parentOrganization", Json.createObjectBuilder().add("@type", "Organization").add("name", affiliation)); + } } + // Both cases + author.add("name", entity.getString("fullName")); + //And add to the array authors.add(author); } JsonArray authorsArray = authors.build(); @@ -1859,16 +1893,8 @@ public String getJsonLd() { job.add("dateModified", this.getPublicationDateAsString()); job.add("version", this.getVersionNumber().toString()); - JsonArrayBuilder descriptionsArray = Json.createArrayBuilder(); - List descriptions = this.getDescriptionsPlainText(); - for (String description : descriptions) { - descriptionsArray.add(description); - } - /** - * In Dataverse 4.8.4 "description" was a single string but now it's an - * array. - */ - job.add("description", descriptionsArray); + String description = this.getDescriptionsPlainTextTruncated(); + job.add("description", description); /** * "keywords" - contains subject(s), datasetkeyword(s) and topicclassification(s) @@ -1892,11 +1918,16 @@ public String getJsonLd() { job.add("keywords", keywords); /** - * citation: (multiple) related publication citation and URLs, if - * present. + * citation: (multiple) related publication citation and URLs, if present. * - * In Dataverse 4.8.4 "citation" was an array of strings but now it's an - * array of objects. + * Schema.org allows text or a CreativeWork object. Google recommends text with + * either the full citation or the PID URL. This code adds an object if we have + * the citation text for the work and/or an entry in the URL field (i.e. + * https://doi.org/...) The URL is reported as the 'url' field while the + * citation text (which would normally include the name) is reported as 'name' + * since there doesn't appear to be a better field ('text', which was used + * previously, is the actual text of the creative work). + * */ List relatedPublications = getRelatedPublications(); if (!relatedPublications.isEmpty()) { @@ -1911,11 +1942,12 @@ public String getJsonLd() { JsonObjectBuilder citationEntry = Json.createObjectBuilder(); citationEntry.add("@type", "CreativeWork"); if (pubCitation != null) { - citationEntry.add("text", pubCitation); + citationEntry.add("name", pubCitation); } if (pubUrl != null) { citationEntry.add("@id", pubUrl); citationEntry.add("identifier", pubUrl); + citationEntry.add("url", pubUrl); } if (addToArray) { jsonArrayBuilder.add(citationEntry); @@ -1957,13 +1989,14 @@ public String getJsonLd() { job.add("license",DatasetUtil.getLicenseURI(this)); } + String installationBrandName = BrandingUtil.getInstallationBrandName(); + job.add("includedInDataCatalog", Json.createObjectBuilder() .add("@type", "DataCatalog") - .add("name", BrandingUtil.getRootDataverseCollectionName()) + .add("name", installationBrandName) .add("url", SystemConfig.getDataverseSiteUrlStatic()) ); - - String installationBrandName = BrandingUtil.getInstallationBrandName(); + /** * Both "publisher" and "provider" are included but they have the same * values. Some services seem to prefer one over the other. @@ -2012,7 +2045,7 @@ public String getJsonLd() { } fileObject.add("@type", "DataDownload"); fileObject.add("name", fileMetadata.getLabel()); - fileObject.add("fileFormat", fileMetadata.getDataFile().getContentType()); + fileObject.add("encodingFormat", fileMetadata.getDataFile().getContentType()); fileObject.add("contentSize", fileMetadata.getDataFile().getFilesize()); fileObject.add("description", fileMetadata.getDescription()); fileObject.add("@id", filePidUrlAsString); @@ -2021,10 +2054,8 @@ public String getJsonLd() { if (hideFilesBoolean != null && hideFilesBoolean.equals("true")) { // no-op } else { - if (FileUtil.isPubliclyDownloadable(fileMetadata)) { - String nullDownloadType = null; - fileObject.add("contentUrl", dataverseSiteUrl + FileUtil.getFileDownloadUrlPath(nullDownloadType, fileMetadata.getDataFile().getId(), false, fileMetadata.getId())); - } + String nullDownloadType = null; + fileObject.add("contentUrl", dataverseSiteUrl + FileUtil.getFileDownloadUrlPath(nullDownloadType, fileMetadata.getDataFile().getId(), false, fileMetadata.getId())); } fileArray.add(fileObject); } diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 6cf294ffd6d..1c033b37872 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -5,7 +5,9 @@ import edu.harvard.iq.dataverse.api.AbstractApiBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.datasetutility.AddReplaceFileHelper; import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; @@ -31,11 +33,14 @@ import edu.harvard.iq.dataverse.ingest.IngestUtil; import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.Setting; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.WebloaderUtil; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.EjbUtil; import edu.harvard.iq.dataverse.util.FileMetadataUtil; @@ -539,7 +544,7 @@ public String init() { return permissionsWrapper.notFound(); } - workingVersion = dataset.getEditVersion(); + workingVersion = dataset.getOrCreateEditVersion(); //TODO: review if we we need this check; // as getEditVersion should either return the exisiting draft or create a new one @@ -586,8 +591,7 @@ public String init() { datafileService, permissionService, commandEngine, - systemConfig, - licenseServiceBean); + systemConfig); fileReplacePageHelper = new FileReplacePageHelper(addReplaceFileHelper, dataset, @@ -890,7 +894,7 @@ private void deleteFiles(List filesForDelete) { // ToDo - FileMetadataUtil.removeFileMetadataFromList should handle these two // removes so they could be put after this if clause and the else clause could // be removed. - dataset.getEditVersion().getFileMetadatas().remove(markedForDelete); + dataset.getOrCreateEditVersion().getFileMetadatas().remove(markedForDelete); fileMetadatas.remove(markedForDelete); filesToBeDeleted.add(markedForDelete); @@ -907,7 +911,7 @@ private void deleteFiles(List filesForDelete) { // 1. delete the filemetadata from the local display list: FileMetadataUtil.removeFileMetadataFromList(fileMetadatas, markedForDelete); // 2. delete the filemetadata from the version: - FileMetadataUtil.removeFileMetadataFromList(dataset.getEditVersion().getFileMetadatas(), markedForDelete); + FileMetadataUtil.removeFileMetadataFromList(dataset.getOrCreateEditVersion().getFileMetadatas(), markedForDelete); } if (markedForDelete.getDataFile().getId() == null) { @@ -1201,7 +1205,7 @@ public String save() { */ } - workingVersion = dataset.getEditVersion(); + workingVersion = dataset.getOrCreateEditVersion(); logger.fine("working version id: " + workingVersion.getId()); if (FileEditMode.EDIT == mode && Referrer.FILE == referrer) { @@ -2425,10 +2429,8 @@ public boolean isTemporaryPreviewAvailable(String fileSystemId, String mimeType) return false; } - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.isEmpty()) { - filesRootDirectory = "/tmp/files"; - } + // Retrieve via MPCONFIG. Has sane default /tmp/dataverse from META-INF/microprofile-config.properties + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); String fileSystemName = filesRootDirectory + "/temp/" + fileSystemId; @@ -3067,6 +3069,10 @@ public boolean globusUploadSupported() { return settingsWrapper.isGlobusUpload() && settingsWrapper.isGlobusEnabledStorageDriver(dataset.getEffectiveStorageDriverId()); } + + public boolean webloaderUploadSupported() { + return settingsWrapper.isWebloaderUpload() && StorageIO.isDirectUploadEnabled(dataset.getEffectiveStorageDriverId()); + } private void populateFileMetadatas() { fileMetadatas = new ArrayList<>(); @@ -3106,4 +3112,18 @@ public void setFileAccessRequest(boolean fileAccessRequest) { public boolean isHasPublicStore() { return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId())); } + + public String getWebloaderUrlForDataset(Dataset d) { + String localeCode = session.getLocaleCode(); + User user = session.getUser(); + if (user instanceof AuthenticatedUser) { + ApiToken apiToken = authService.getValidApiTokenForUser((AuthenticatedUser) user); + return WebloaderUtil.getWebloaderUrl(d, apiToken, localeCode, + settingsService.getValueForKey(SettingsServiceBean.Key.WebloaderUrl)); + } else { + // Shouldn't normally happen (seesion timeout? bug?) + logger.warning("getWebloaderUrlForDataset called for non-Authenticated user"); + return null; + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java b/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java index 6262b6204f4..fc31d0867ed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java @@ -237,7 +237,6 @@ public List getCategoriesByName() { return ret; } - public JsonArrayBuilder getCategoryNamesAsJsonArrayBuilder() { JsonArrayBuilder builder = Json.createArrayBuilder(); diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index 7f2c6dfca5c..228db0a7584 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -39,6 +39,7 @@ import edu.harvard.iq.dataverse.util.JsfHelper; import static edu.harvard.iq.dataverse.util.JsfHelper.JH; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.io.IOException; import java.time.format.DateTimeFormatter; import java.util.ArrayList; @@ -57,6 +58,9 @@ import javax.faces.view.ViewScoped; import javax.inject.Inject; import javax.inject.Named; +import javax.json.JsonArray; +import javax.json.JsonObject; +import javax.json.JsonValue; import javax.validation.ConstraintViolation; import org.primefaces.PrimeFaces; @@ -125,6 +129,8 @@ public class FilePage implements java.io.Serializable { ExternalToolServiceBean externalToolService; @EJB PrivateUrlServiceBean privateUrlService; + @EJB + AuxiliaryFileServiceBean auxiliaryFileService; @Inject DataverseRequestServiceBean dvRequestService; @@ -285,8 +291,15 @@ public void setDatasetVersionId(Long datasetVersionId) { this.datasetVersionId = datasetVersionId; } + // findPreviewTools would be a better name private List sortExternalTools(){ - List retList = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.PREVIEW, file.getContentType()); + List retList = new ArrayList<>(); + List previewTools = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.PREVIEW, file.getContentType()); + for (ExternalTool previewTool : previewTools) { + if (externalToolService.meetsRequirements(previewTool, file)) { + retList.add(previewTool); + } + } Collections.sort(retList, CompareExternalToolName); return retList; } @@ -365,7 +378,7 @@ public String saveProvFreeform(String freeformTextInput, DataFile dataFileFromPo file.setProvEntityName(dataFileFromPopup.getProvEntityName()); //passing this value into the file being saved here is pretty hacky. Command cmd; - for (FileMetadata fmw : editDataset.getEditVersion().getFileMetadatas()) { + for (FileMetadata fmw : editDataset.getOrCreateEditVersion().getFileMetadatas()) { if (fmw.getDataFile().equals(this.fileMetadata.getDataFile())) { cmd = new PersistProvFreeFormCommand(dvRequestService.getDataverseRequest(), file, freeformTextInput); commandEngine.submit(cmd); @@ -381,15 +394,15 @@ public String restrictFile(boolean restricted) throws CommandException{ String fileNames = null; editDataset = this.file.getOwner(); if (restricted) { // get values from access popup - editDataset.getEditVersion().getTermsOfUseAndAccess().setTermsOfAccess(termsOfAccess); - editDataset.getEditVersion().getTermsOfUseAndAccess().setFileAccessRequest(fileAccessRequest); + editDataset.getOrCreateEditVersion().getTermsOfUseAndAccess().setTermsOfAccess(termsOfAccess); + editDataset.getOrCreateEditVersion().getTermsOfUseAndAccess().setFileAccessRequest(fileAccessRequest); } //using this method to update the terms for datasets that are out of compliance // with Terms of Access requirement - may get her with a file that is already restricted // we'll allow it try { Command cmd; - for (FileMetadata fmw : editDataset.getEditVersion().getFileMetadatas()) { + for (FileMetadata fmw : editDataset.getOrCreateEditVersion().getFileMetadatas()) { if (fmw.getDataFile().equals(this.fileMetadata.getDataFile())) { fileNames += fmw.getLabel(); cmd = new RestrictFileCommand(fmw.getDataFile(), dvRequestService.getDataverseRequest(), restricted); @@ -424,7 +437,7 @@ public String deleteFile() { FileMetadata markedForDelete = null; - for (FileMetadata fmd : editDataset.getEditVersion().getFileMetadatas()) { + for (FileMetadata fmd : editDataset.getOrCreateEditVersion().getFileMetadatas()) { if (fmd.getDataFile().getId().equals(fileId)) { markedForDelete = fmd; @@ -435,17 +448,17 @@ public String deleteFile() { // the file already exists as part of this dataset // so all we remove is the file from the fileMetadatas (for display) // and let the delete be handled in the command (by adding it to the filesToBeDeleted list - editDataset.getEditVersion().getFileMetadatas().remove(markedForDelete); + editDataset.getOrCreateEditVersion().getFileMetadatas().remove(markedForDelete); filesToBeDeleted.add(markedForDelete); } else { List filesToKeep = new ArrayList<>(); - for (FileMetadata fmo : editDataset.getEditVersion().getFileMetadatas()) { + for (FileMetadata fmo : editDataset.getOrCreateEditVersion().getFileMetadatas()) { if (!fmo.getDataFile().getId().equals(this.getFile().getId())) { filesToKeep.add(fmo); } } - editDataset.getEditVersion().setFileMetadatas(filesToKeep); + editDataset.getOrCreateEditVersion().setFileMetadatas(filesToKeep); } fileDeleteInProgress = true; @@ -612,7 +625,7 @@ public void setTermsMet(boolean termsMet) { public String save() { // Validate - Set constraintViolations = editDataset.getEditVersion().validate(); + Set constraintViolations = editDataset.getOrCreateEditVersion().validate(); if (!constraintViolations.isEmpty()) { //JsfHelper.addFlashMessage(JH.localize("dataset.message.validationError")); fileDeleteInProgress = false; @@ -629,7 +642,7 @@ public String save() { if (!filesToBeDeleted.isEmpty()) { // We want to delete the file (there's always only one file with this page) - editDataset.getEditVersion().getFileMetadatas().remove(filesToBeDeleted.get(0)); + editDataset.getOrCreateEditVersion().getFileMetadatas().remove(filesToBeDeleted.get(0)); deleteFileId = filesToBeDeleted.get(0).getDataFile().getId(); deleteStorageLocation = datafileService.getPhysicalFileToDelete(filesToBeDeleted.get(0).getDataFile()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java index 1a8ee8a85e8..df16991b51e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java @@ -24,8 +24,6 @@ import java.io.File; import java.io.FileInputStream; -import java.net.InetAddress; -import java.net.UnknownHostException; import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; @@ -34,6 +32,7 @@ import java.security.PrivateKey; /* Handlenet imports: */ +import edu.harvard.iq.dataverse.util.SystemConfig; import net.handle.hdllib.AbstractMessage; import net.handle.hdllib.AbstractResponse; import net.handle.hdllib.AdminRecord; @@ -247,21 +246,7 @@ private String getRegistrationUrl(DvObject dvObject) { } public String getSiteUrl() { - logger.log(Level.FINE,"getSiteUrl"); - String hostUrl = System.getProperty("dataverse.siteUrl"); - if (hostUrl != null && !"".equals(hostUrl)) { - return hostUrl; - } - String hostName = System.getProperty("dataverse.fqdn"); - if (hostName == null) { - try { - hostName = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - return null; - } - } - hostUrl = "https://" + hostName; - return hostUrl; + return SystemConfig.getDataverseSiteUrlStatic(); } private byte[] readKey(final String file) { diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java index bc83c15dcd7..5be7578f7f8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java @@ -9,7 +9,6 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.CreateHarvestingClientCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteHarvestingClientCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateHarvestingClientCommand; import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; @@ -24,7 +23,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Locale; import java.util.Collections; import java.util.logging.Level; import java.util.logging.Logger; @@ -79,7 +77,7 @@ public class HarvestingClientsPage implements java.io.Serializable { private Dataverse dataverse; private Long dataverseId = null; private HarvestingClient selectedClient; - private boolean setListTruncated = false; + private boolean setListTruncated = false; //private static final String solrDocIdentifierDataset = "dataset_"; @@ -245,6 +243,7 @@ public void editClient(HarvestingClient harvestingClient) { this.newNickname = harvestingClient.getName(); this.newHarvestingUrl = harvestingClient.getHarvestingUrl(); + this.customHeader = harvestingClient.getCustomHttpHeaders(); this.initialSettingsValidated = false; // TODO: do we want to try and contact the server, again, to make @@ -340,6 +339,7 @@ public void createClient(ActionEvent ae) { getSelectedDestinationDataverse().getHarvestingClientConfigs().add(newHarvestingClient); newHarvestingClient.setHarvestingUrl(newHarvestingUrl); + newHarvestingClient.setCustomHttpHeaders(customHeader); if (!StringUtils.isEmpty(newOaiSet)) { newHarvestingClient.setHarvestingSet(newOaiSet); } @@ -426,6 +426,7 @@ public void saveClient(ActionEvent ae) { // nickname is not editable for existing clients: //harvestingClient.setName(newNickname); harvestingClient.setHarvestingUrl(newHarvestingUrl); + harvestingClient.setCustomHttpHeaders(customHeader); harvestingClient.setHarvestingSet(newOaiSet); harvestingClient.setMetadataPrefix(newMetadataFormat); harvestingClient.setHarvestStyle(newHarvestingStyle); @@ -554,6 +555,9 @@ public boolean validateServerUrlOAI() { if (!StringUtils.isEmpty(getNewHarvestingUrl())) { OaiHandler oaiHandler = new OaiHandler(getNewHarvestingUrl()); + if (getNewCustomHeader() != null) { + oaiHandler.setCustomHeaders(oaiHandler.makeCustomHeaders(getNewCustomHeader())); + } boolean success = true; String message = null; @@ -635,6 +639,23 @@ public boolean validateServerUrlOAI() { return false; } + public boolean validateCustomHeader() { + if (!StringUtils.isEmpty(getNewCustomHeader())) { + // TODO: put this method somewhere else as a static utility + + // check that it's looking like "{header-name}: {header value}" at least + if (!Pattern.matches("^[a-zA-Z0-9\\_\\-]+:.*",getNewCustomHeader())) { + FacesContext.getCurrentInstance().addMessage(getNewClientCustomHeaderInputField().getClientId(), + new FacesMessage(FacesMessage.SEVERITY_ERROR, "", BundleUtil.getStringFromBundle("harvestclients.newClientDialog.customHeader.invalid"))); + + return false; + } + } + + // this setting is optional + return true; + } + public void validateInitialSettings() { if (isHarvestTypeOAI()) { boolean nicknameValidated = true; @@ -644,9 +665,10 @@ public void validateInitialSettings() { destinationDataverseValidated = validateSelectedDataverse(); } boolean urlValidated = validateServerUrlOAI(); + boolean customHeaderValidated = validateCustomHeader(); - if (nicknameValidated && destinationDataverseValidated && urlValidated) { - // In Create mode we want to run all 3 validation tests; this is why + if (nicknameValidated && destinationDataverseValidated && urlValidated && customHeaderValidated) { + // In Create mode we want to run all 4 validation tests; this is why // we are not doing "if ((validateNickname() && validateServerUrlOAI())" // in the line above. -- L.A. 4.4 May 2016. @@ -688,6 +710,7 @@ public void backToStepThree() { UIInput newClientNicknameInputField; UIInput newClientUrlInputField; + UIInput newClientCustomHeaderInputField; UIInput hiddenInputField; /*UISelectOne*/ UIInput metadataFormatMenu; UIInput remoteArchiveStyleMenu; @@ -695,6 +718,7 @@ public void backToStepThree() { private String newNickname = ""; private String newHarvestingUrl = ""; + private String customHeader = null; private boolean initialSettingsValidated = false; private String newOaiSet = ""; private String newMetadataFormat = ""; @@ -718,6 +742,7 @@ public void initNewClient(ActionEvent ae) { //this.selectedClient = new HarvestingClient(); this.newNickname = ""; this.newHarvestingUrl = ""; + this.customHeader = null; this.initialSettingsValidated = false; this.newOaiSet = ""; this.newMetadataFormat = ""; @@ -762,6 +787,14 @@ public void setNewHarvestingUrl(String newHarvestingUrl) { this.newHarvestingUrl = newHarvestingUrl; } + public String getNewCustomHeader() { + return customHeader; + } + + public void setNewCustomHeader(String customHeader) { + this.customHeader = customHeader; + } + public int getHarvestTypeRadio() { return this.harvestTypeRadio; } @@ -871,6 +904,14 @@ public void setNewClientUrlInputField(UIInput newClientInputField) { this.newClientUrlInputField = newClientInputField; } + public UIInput getNewClientCustomHeaderInputField() { + return newClientCustomHeaderInputField; + } + + public void setNewClientCustomHeaderInputField(UIInput newClientInputField) { + this.newClientCustomHeaderInputField = newClientInputField; + } + public UIInput getHiddenInputField() { return hiddenInputField; } diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index aa40423000d..bf36f265743 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -107,6 +107,8 @@ public class SettingsWrapper implements java.io.Serializable { private Boolean rsyncOnly = null; + private Boolean webloaderUpload = null; + private String metricsUrl = null; private Boolean dataFilePIDSequentialDependent = null; @@ -338,6 +340,13 @@ public String getGlobusAppUrl() { } + public boolean isWebloaderUpload() { + if (webloaderUpload == null) { + webloaderUpload = systemConfig.isWebloaderUpload(); + } + return webloaderUpload; + } + public boolean isRsyncOnly() { if (rsyncOnly == null) { String downloadMethods = getValueForKey(SettingsServiceBean.Key.DownloadMethods); diff --git a/src/main/java/edu/harvard/iq/dataverse/Template.java b/src/main/java/edu/harvard/iq/dataverse/Template.java index 61f0a78656f..7798367b4d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Template.java +++ b/src/main/java/edu/harvard/iq/dataverse/Template.java @@ -9,6 +9,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.TreeMap; import java.util.stream.Collectors; import javax.json.Json; @@ -139,9 +140,9 @@ public List getDatasetFields() { private Map instructionsMap = null; @Transient - private Map> metadataBlocksForView = new HashMap<>(); + private TreeMap> metadataBlocksForView = new TreeMap<>(); @Transient - private Map> metadataBlocksForEdit = new HashMap<>(); + private TreeMap> metadataBlocksForEdit = new TreeMap<>(); @Transient private boolean isDefaultForDataverse; @@ -166,19 +167,19 @@ public void setDataversesHasAsDefault(List dataversesHasAsDefault) { } - public Map> getMetadataBlocksForView() { + public TreeMap> getMetadataBlocksForView() { return metadataBlocksForView; } - public void setMetadataBlocksForView(Map> metadataBlocksForView) { + public void setMetadataBlocksForView(TreeMap> metadataBlocksForView) { this.metadataBlocksForView = metadataBlocksForView; } - public Map> getMetadataBlocksForEdit() { + public TreeMap> getMetadataBlocksForEdit() { return metadataBlocksForEdit; } - public void setMetadataBlocksForEdit(Map> metadataBlocksForEdit) { + public void setMetadataBlocksForEdit(TreeMap> metadataBlocksForEdit) { this.metadataBlocksForEdit = metadataBlocksForEdit; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index ed9a544e726..51f6f05f326 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -46,9 +46,11 @@ import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.locality.StorageSiteServiceBean; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonParser; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean; @@ -362,7 +364,7 @@ protected AuthenticatedUser findUserByApiToken( String apiKey ) { protected User findUserOrDie() throws WrappedResponse { final String requestApiKey = getRequestApiKey(); final String requestWFKey = getRequestWorkflowInvocationID(); - if (requestApiKey == null && requestWFKey == null) { + if (requestApiKey == null && requestWFKey == null && getRequestParameter(UrlSignerUtil.SIGNED_URL_TOKEN)==null) { return GuestUser.get(); } PrivateUrlUser privateUrlUser = privateUrlSvc.getPrivateUrlUserFromToken(requestApiKey); @@ -419,10 +421,36 @@ private AuthenticatedUser findAuthenticatedUserOrDie( String key, String wfid ) } else { throw new WrappedResponse(badWFKey(wfid)); } + } else if (getRequestParameter(UrlSignerUtil.SIGNED_URL_TOKEN) != null) { + AuthenticatedUser authUser = getAuthenticatedUserFromSignedUrl(); + if (authUser != null) { + return authUser; + } } //Just send info about the apiKey - workflow users will learn about invocationId elsewhere throw new WrappedResponse(badApiKey(null)); } + + private AuthenticatedUser getAuthenticatedUserFromSignedUrl() { + AuthenticatedUser authUser = null; + // The signedUrl contains a param telling which user this is supposed to be for. + // We don't trust this. So we lookup that user, and get their API key, and use + // that as a secret in validating the signedURL. If the signature can't be + // validated with their key, the user (or their API key) has been changed and + // we reject the request. + // ToDo - add null checks/ verify that calling methods catch things. + String user = httpRequest.getParameter("user"); + AuthenticatedUser targetUser = authSvc.getAuthenticatedUser(user); + String key = JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + + authSvc.findApiTokenByUser(targetUser).getTokenString(); + String signedUrl = httpRequest.getRequestURL().toString() + "?" + httpRequest.getQueryString(); + String method = httpRequest.getMethod(); + boolean validated = UrlSignerUtil.isValidUrl(signedUrl, user, method, key); + if (validated) { + authUser = targetUser; + } + return authUser; + } protected Dataverse findDataverseOrDie( String dvIdtf ) throws WrappedResponse { Dataverse dv = findDataverse(dvIdtf); @@ -477,6 +505,7 @@ protected Dataset findDatasetOrDie(String id) throws WrappedResponse { } protected DataFile findDataFileOrDie(String id) throws WrappedResponse { + DataFile datafile; if (id.equals(PERSISTENT_ID_KEY)) { String persistentId = getRequestParameter(PERSISTENT_ID_KEY.substring(1)); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index abeedf23b59..3bd0a19672b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -187,9 +187,6 @@ public class Access extends AbstractApiBean { @Inject MakeDataCountLoggingServiceBean mdcLogService; - - private static final String API_KEY_HEADER = "X-Dataverse-key"; - //@EJB // TODO: @@ -197,26 +194,23 @@ public class Access extends AbstractApiBean { @Path("datafile/bundle/{fileId}") @GET @Produces({"application/zip"}) - public BundleDownloadInstance datafileBundle(@PathParam("fileId") String fileId, @QueryParam("fileMetadataId") Long fileMetadataId,@QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { + public BundleDownloadInstance datafileBundle(@PathParam("fileId") String fileId, @QueryParam("fileMetadataId") Long fileMetadataId,@QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { GuestbookResponse gbr = null; DataFile df = findDataFileOrDieWrapper(fileId); - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - // This will throw a ForbiddenException if access isn't authorized: - checkAuthorization(df, apiToken); + checkAuthorization(df); if (gbrecs != true && df.isReleased()){ // Write Guestbook record if not done previously and file is released - User apiTokenUser = findAPITokenUser(apiToken); + //This calls findUserOrDie which will retrieve the key param or api token header, or the workflow token header. + User apiTokenUser = findAPITokenUser(); gbr = guestbookResponseService.initAPIGuestbookResponse(df.getOwner(), df, session, apiTokenUser); guestbookResponseService.save(gbr); - MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, df); + MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, df); mdcLogService.logEntry(entry); } @@ -278,7 +272,7 @@ private DataFile findDataFileOrDieWrapper(String fileId){ @Path("datafile/{fileId:.+}") @GET @Produces({"application/xml"}) - public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { + public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { // check first if there's a trailing slash, and chop it: while (fileId.lastIndexOf('/') == fileId.length() - 1) { @@ -303,20 +297,16 @@ public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs throw new NotFoundException(errorMessage); // (nobody should ever be using this API on a harvested DataFile)! } - - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - + + // This will throw a ForbiddenException if access isn't authorized: + checkAuthorization(df); + if (gbrecs != true && df.isReleased()){ // Write Guestbook record if not done previously and file is released - User apiTokenUser = findAPITokenUser(apiToken); + User apiTokenUser = findAPITokenUser(); gbr = guestbookResponseService.initAPIGuestbookResponse(df.getOwner(), df, session, apiTokenUser); } - - // This will throw a ForbiddenException if access isn't authorized: - checkAuthorization(df, apiToken); - + DownloadInfo dInfo = new DownloadInfo(df); logger.fine("checking if thumbnails are supported on this file."); @@ -532,11 +522,10 @@ public String tabularDatafileMetadataDDI(@PathParam("fileId") String fileId, @Q @Path("datafile/{fileId}/auxiliary") @GET public Response listDatafileMetadataAux(@PathParam("fileId") String fileId, - @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws ServiceUnavailableException { - return listAuxiliaryFiles(fileId, null, apiToken, uriInfo, headers, response); + return listAuxiliaryFiles(fileId, null, uriInfo, headers, response); } /* * GET method for retrieving a list auxiliary files associated with @@ -547,26 +536,21 @@ public Response listDatafileMetadataAux(@PathParam("fileId") String fileId, @GET public Response listDatafileMetadataAuxByOrigin(@PathParam("fileId") String fileId, @PathParam("origin") String origin, - @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws ServiceUnavailableException { - return listAuxiliaryFiles(fileId, origin, apiToken, uriInfo, headers, response); + return listAuxiliaryFiles(fileId, origin, uriInfo, headers, response); } - private Response listAuxiliaryFiles(String fileId, String origin, String apiToken, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) { + private Response listAuxiliaryFiles(String fileId, String origin, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) { DataFile df = findDataFileOrDieWrapper(fileId); - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - List auxFileList = auxiliaryFileService.findAuxiliaryFiles(df, origin); if (auxFileList == null || auxFileList.isEmpty()) { throw new NotFoundException("No Auxiliary files exist for datafile " + fileId + (origin==null ? "": " and the specified origin")); } - boolean isAccessAllowed = isAccessAuthorized(df, apiToken); + boolean isAccessAllowed = isAccessAuthorized(df); JsonArrayBuilder jab = Json.createArrayBuilder(); auxFileList.forEach(auxFile -> { if (isAccessAllowed || auxFile.getIsPublic()) { @@ -594,17 +578,12 @@ private Response listAuxiliaryFiles(String fileId, String origin, String apiToke public DownloadInstance downloadAuxiliaryFile(@PathParam("fileId") String fileId, @PathParam("formatTag") String formatTag, @PathParam("formatVersion") String formatVersion, - @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws ServiceUnavailableException { DataFile df = findDataFileOrDieWrapper(fileId); - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - DownloadInfo dInfo = new DownloadInfo(df); boolean publiclyAvailable = false; @@ -654,7 +633,7 @@ public DownloadInstance downloadAuxiliaryFile(@PathParam("fileId") String fileId // as defined for the DataFile itself), and will throw a ForbiddenException // if access is denied: if (!publiclyAvailable) { - checkAuthorization(df, apiToken); + checkAuthorization(df); } return downloadInstance; @@ -670,16 +649,16 @@ public DownloadInstance downloadAuxiliaryFile(@PathParam("fileId") String fileId @POST @Consumes("text/plain") @Produces({ "application/zip" }) - public Response postDownloadDatafiles(String fileIds, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + public Response postDownloadDatafiles(String fileIds, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } @Path("dataset/{id}") @GET @Produces({"application/zip"}) - public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersistentId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersistentId, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { try { User user = findUserOrDie(); DataverseRequest req = createDataverseRequest(user); @@ -693,7 +672,7 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist // We don't want downloads from Draft versions to be counted, // so we are setting the gbrecs (aka "do not write guestbook response") // variable accordingly: - return downloadDatafiles(fileIds, true, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, true, uriInfo, headers, response); } } @@ -714,7 +693,7 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist } String fileIds = getFileIdsAsCommaSeparated(latest.getFileMetadatas()); - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -763,7 +742,7 @@ public Command handleLatestPublished() { if (dsv.isDraft()) { gbrecs = true; } - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -784,11 +763,11 @@ private static String getFileIdsAsCommaSeparated(List fileMetadata @Path("datafiles/{fileIds}") @GET @Produces({"application/zip"}) - public Response datafiles(@PathParam("fileIds") String fileIds, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + public Response datafiles(@PathParam("fileIds") String fileIds, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } - private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBResponse, String apiTokenParam, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) throws WebApplicationException /* throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { + private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBResponse, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) throws WebApplicationException /* throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { final long zipDownloadSizeLimit = systemConfig.getZipDownloadLimit(); logger.fine("setting zip download size limit to " + zipDownloadSizeLimit + " bytes."); @@ -810,11 +789,7 @@ private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBRespon String customZipServiceUrl = settingsService.getValueForKey(SettingsServiceBean.Key.CustomZipDownloadServiceUrl); boolean useCustomZipService = customZipServiceUrl != null; - String apiToken = (apiTokenParam == null || apiTokenParam.equals("")) - ? headers.getHeaderString(API_KEY_HEADER) - : apiTokenParam; - - User apiTokenUser = findAPITokenUser(apiToken); //for use in adding gb records if necessary + User apiTokenUser = findAPITokenUser(); //for use in adding gb records if necessary Boolean getOrig = false; for (String key : uriInfo.getQueryParameters().keySet()) { @@ -827,7 +802,7 @@ private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBRespon if (useCustomZipService) { URI redirect_uri = null; try { - redirect_uri = handleCustomZipDownload(customZipServiceUrl, fileIds, apiToken, apiTokenUser, uriInfo, headers, donotwriteGBResponse, true); + redirect_uri = handleCustomZipDownload(customZipServiceUrl, fileIds, apiTokenUser, uriInfo, headers, donotwriteGBResponse, true); } catch (WebApplicationException wae) { throw wae; } @@ -859,7 +834,7 @@ public void write(OutputStream os) throws IOException, logger.fine("token: " + fileIdParams[i]); Long fileId = null; try { - fileId = new Long(fileIdParams[i]); + fileId = Long.parseLong(fileIdParams[i]); } catch (NumberFormatException nfe) { fileId = null; } @@ -867,7 +842,7 @@ public void write(OutputStream os) throws IOException, logger.fine("attempting to look up file id " + fileId); DataFile file = dataFileService.find(fileId); if (file != null) { - if (isAccessAuthorized(file, apiToken)) { + if (isAccessAuthorized(file)) { logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance."); //downloadInstance.addDataFile(file); @@ -1384,7 +1359,7 @@ public Response allowAccessRequest(@PathParam("id") String datasetToAllowAccessI return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.fileAccess.failure.noUser", args)); } - dataset.getEditVersion().getTermsOfUseAndAccess().setFileAccessRequest(allowRequest); + dataset.getOrCreateEditVersion().getTermsOfUseAndAccess().setFileAccessRequest(allowRequest); try { engineSvc.submit(new UpdateDatasetVersionCommand(dataset, dataverseRequest)); @@ -1436,8 +1411,8 @@ public Response requestFileAccess(@PathParam("id") String fileToRequestAccessId, List args = Arrays.asList(wr.getLocalizedMessage()); return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.fileAccess.failure.noUser", args)); } - - if (isAccessAuthorized(dataFile, getRequestApiKey())) { + //Already have access + if (isAccessAuthorized(dataFile)) { return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.requestAccess.failure.invalidRequest")); } @@ -1708,15 +1683,15 @@ public Response rejectFileAccess(@PathParam("id") String fileToRequestAccessId, // checkAuthorization is a convenience method; it calls the boolean method // isAccessAuthorized(), the actual workhorse, tand throws a 403 exception if not. - private void checkAuthorization(DataFile df, String apiToken) throws WebApplicationException { + private void checkAuthorization(DataFile df) throws WebApplicationException { - if (!isAccessAuthorized(df, apiToken)) { + if (!isAccessAuthorized(df)) { throw new ForbiddenException(); } } - private boolean isAccessAuthorized(DataFile df, String apiToken) { + private boolean isAccessAuthorized(DataFile df) { // First, check if the file belongs to a released Dataset version: boolean published = false; @@ -1787,37 +1762,50 @@ private boolean isAccessAuthorized(DataFile df, String apiToken) { } } - if (!restricted && !embargoed) { - // And if they are not published, they can still be downloaded, if the user + + + //The one case where we don't need to check permissions + if (!restricted && !embargoed && published) { + // If they are not published, they can still be downloaded, if the user // has the permission to view unpublished versions! (this case will // be handled below) - if (published) { - return true; - } + return true; } + //For permissions check decide if we have a session user, or an API user User user = null; /** * Authentication/authorization: + */ + + User apiTokenUser = null; + + /* + * The logic looks for an apitoken authenticated user and uses it if it exists. + * If not, and a session user exists, we use that. If the apitoken method + * indicates a GuestUser, we will use that if there's no session. * - * note that the fragment below - that retrieves the session object - * and tries to find the user associated with the session - is really - * for logging/debugging purposes only; for practical purposes, it - * would be enough to just call "permissionService.on(df).has(Permission.DownloadFile)" - * and the method does just that, tries to authorize for the user in - * the current session (or guest user, if no session user is available): + * This is currently the only API call that supports sessions. If the rest of + * the API is opened up, the custom logic here wouldn't be needed. */ + + try { + logger.fine("calling apiTokenUser = findUserOrDie()..."); + apiTokenUser = findUserOrDie(); + } catch (WrappedResponse wr) { + logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); + } - if (session != null) { + if ((apiTokenUser instanceof GuestUser) && session != null) { if (session.getUser() != null) { - if (session.getUser().isAuthenticated()) { - user = session.getUser(); - } else { + user = session.getUser(); + apiTokenUser=null; + //Fine logging + if (!session.getUser().isAuthenticated()) { logger.fine("User associated with the session is not an authenticated user."); if (session.getUser() instanceof PrivateUrlUser) { logger.fine("User associated with the session is a PrivateUrlUser user."); - user = session.getUser(); } if (session.getUser() instanceof GuestUser) { logger.fine("User associated with the session is indeed a guest user."); @@ -1829,154 +1817,45 @@ private boolean isAccessAuthorized(DataFile df, String apiToken) { } else { logger.fine("Session is null."); } - - User apiTokenUser = null; - - if ((apiToken != null)&&(apiToken.length()!=64)) { - // We'll also try to obtain the user information from the API token, - // if supplied: - - try { - logger.fine("calling apiTokenUser = findUserOrDie()..."); - apiTokenUser = findUserOrDie(); - } catch (WrappedResponse wr) { - logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); - } - - if (apiTokenUser == null) { - logger.warning("API token-based auth: Unable to find a user with the API token provided."); - } + //If we don't have a user, nothing more to do. (Note session could have returned GuestUser) + if (user == null && apiTokenUser == null) { + logger.warning("Unable to find a user via session or with a token."); + return false; } - - // OK, let's revisit the case of non-restricted files, this time in - // an unpublished version: - // (if (published) was already addressed above) - - if (!restricted && !embargoed) { - // If the file is not published, they can still download the file, if the user - // has the permission to view unpublished versions: - - if ( user != null ) { - // used in JSF context - if (permissionService.requestOn(dvRequestService.getDataverseRequest(), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - // it's not unthinkable, that a null user (i.e., guest user) could be given - // the ViewUnpublished permission! - logger.log(Level.FINE, "Session-based auth: user {0} has access rights on the non-restricted, unpublished datafile.", user.getIdentifier()); - return true; - } - } - if (apiTokenUser != null) { - // used in an API context - if (permissionService.requestOn( createDataverseRequest(apiTokenUser), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - logger.log(Level.FINE, "Token-based auth: user {0} has access rights on the non-restricted, unpublished datafile.", apiTokenUser.getIdentifier()); - return true; - } - } + /* + * Since published and not restricted/embargoed is handled above, the main split + * now is whether it is published or not. If it's published, the only case left + * is with restricted/embargoed. With unpublished, both the restricted/embargoed + * and not restricted/embargoed both get handled the same way. + */ - // last option - guest user in either contexts - // Guset user is impled by the code above. - if ( permissionService.requestOn(dvRequestService.getDataverseRequest(), df.getOwner()).has(Permission.ViewUnpublishedDataset) ) { - return true; - } - + DataverseRequest dvr = null; + if (apiTokenUser != null) { + dvr = createDataverseRequest(apiTokenUser); } else { - - // OK, this is a restricted and/or embargoed file. - - boolean hasAccessToRestrictedBySession = false; - boolean hasAccessToRestrictedByToken = false; - - if (permissionService.on(df).has(Permission.DownloadFile)) { - // Note: PermissionServiceBean.on(Datafile df) will obtain the - // User from the Session object, just like in the code fragment - // above. That's why it's not passed along as an argument. - hasAccessToRestrictedBySession = true; - } else if (apiTokenUser != null && permissionService.requestOn(createDataverseRequest(apiTokenUser), df).has(Permission.DownloadFile)) { - hasAccessToRestrictedByToken = true; - } - - if (hasAccessToRestrictedBySession || hasAccessToRestrictedByToken) { - if (published) { - if (hasAccessToRestrictedBySession) { - if (user != null) { - logger.log(Level.FINE, "Session-based auth: user {0} is granted access to the restricted, published datafile.", user.getIdentifier()); - } else { - logger.fine("Session-based auth: guest user is granted access to the restricted, published datafile."); - } - } else { - logger.log(Level.FINE, "Token-based auth: user {0} is granted access to the restricted, published datafile.", apiTokenUser.getIdentifier()); - } - return true; - } else { - // if the file is NOT published, we will let them download the - // file ONLY if they also have the permission to view - // unpublished versions: - // Note that the code below does not allow a case where it is the - // session user that has the permission on the file, and the API token - // user with the ViewUnpublished permission, or vice versa! - if (hasAccessToRestrictedBySession) { - if (permissionService.on(df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - if (user != null) { - logger.log(Level.FINE, "Session-based auth: user {0} is granted access to the restricted, unpublished datafile.", user.getIdentifier()); - } else { - logger.fine("Session-based auth: guest user is granted access to the restricted, unpublished datafile."); - } - return true; - } - } else { - if (apiTokenUser != null && permissionService.requestOn(createDataverseRequest(apiTokenUser), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - logger.log(Level.FINE, "Token-based auth: user {0} is granted access to the restricted, unpublished datafile.", apiTokenUser.getIdentifier()); - return true; - } - } - } - } - } + // used in JSF context, user may be Guest + dvr = dvRequestService.getDataverseRequest(); + } + if (!published) { // and restricted or embargoed (implied by earlier processing) + // If the file is not published, they can still download the file, if the user + // has the permission to view unpublished versions: - - if ((apiToken != null)) { - // Will try to obtain the user information from the API token, - // if supplied: - - try { - logger.fine("calling user = findUserOrDie()..."); - user = findUserOrDie(); - } catch (WrappedResponse wr) { - logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); + // This line handles all three authenticated session user, token user, and guest cases. + if (permissionService.requestOn(dvr, df.getOwner()).has(Permission.ViewUnpublishedDataset)) { + // it's not unthinkable, that a GuestUser could be given + // the ViewUnpublished permission! + logger.log(Level.FINE, + "Session-based auth: user {0} has access rights on the non-restricted, unpublished datafile.", + dvr.getUser().getIdentifier()); + return true; } - - if (user == null) { - logger.warning("API token-based auth: Unable to find a user with the API token provided."); - return false; - } - - - //Doesn't this ~duplicate logic above - if so, if there's a way to get here, I think it still works for embargoed files (you only get access if you have download permissions, and, if not published, also view unpublished) - if (permissionService.requestOn(createDataverseRequest(user), df).has(Permission.DownloadFile)) { - if (published) { - logger.log(Level.FINE, "API token-based auth: User {0} has rights to access the datafile.", user.getIdentifier()); - //Same case as line 1809 (and part of 1708 though when published you don't need the DownloadFile permission) - return true; - } else { - // if the file is NOT published, we will let them download the - // file ONLY if they also have the permission to view - // unpublished versions: - if (permissionService.requestOn(createDataverseRequest(user), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - logger.log(Level.FINE, "API token-based auth: User {0} has rights to access the (unpublished) datafile.", user.getIdentifier()); - //Same case as line 1843? - return true; - } else { - logger.log(Level.FINE, "API token-based auth: User {0} is not authorized to access the (unpublished) datafile.", user.getIdentifier()); - } - } - } else { - logger.log(Level.FINE, "API token-based auth: User {0} is not authorized to access the datafile.", user.getIdentifier()); + } else { // published and restricted and/or embargoed + // This line also handles all three authenticated session user, token user, and guest cases. + if (permissionService.requestOn(dvr, df).has(Permission.DownloadFile)) { + return true; } - - return false; - } - + } if (user != null) { logger.log(Level.FINE, "Session-based auth: user {0} has NO access rights on the requested datafile.", user.getIdentifier()); } @@ -1984,37 +1863,35 @@ private boolean isAccessAuthorized(DataFile df, String apiToken) { if (apiTokenUser != null) { logger.log(Level.FINE, "Token-based auth: user {0} has NO access rights on the requested datafile.", apiTokenUser.getIdentifier()); } - - if (user == null && apiTokenUser == null) { - logger.fine("Unauthenticated access: No guest access to the datafile."); - } - return false; } - private User findAPITokenUser(String apiToken) { + private User findAPITokenUser() { User apiTokenUser = null; - - if ((apiToken != null) && (apiToken.length() != 64)) { - // We'll also try to obtain the user information from the API token, - // if supplied: - - try { - logger.fine("calling apiTokenUser = findUserOrDie()..."); - apiTokenUser = findUserOrDie(); - return apiTokenUser; - } catch (WrappedResponse wr) { - logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); - return null; + try { + logger.fine("calling apiTokenUser = findUserOrDie()..."); + apiTokenUser = findUserOrDie(); + /* + * The idea here is to not let a guest user returned from findUserOrDie (which + * happens when there is no key/token, and which we want if there's no session) + * from overriding an authenticated session user. + */ + if(apiTokenUser instanceof GuestUser) { + if(session!=null && session.getUser()!=null) { + //The apiTokenUser, if set, will override the sessionUser in permissions calcs, so set it to null if we have a session user + apiTokenUser=null; + } } - + return apiTokenUser; + } catch (WrappedResponse wr) { + logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); + return null; } - return apiTokenUser; } - private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, String apiToken, User apiTokenUser, UriInfo uriInfo, HttpHeaders headers, boolean donotwriteGBResponse, boolean orig) throws WebApplicationException { + private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, User apiTokenUser, UriInfo uriInfo, HttpHeaders headers, boolean donotwriteGBResponse, boolean orig) throws WebApplicationException { String zipServiceKey = null; Timestamp timestamp = null; @@ -2031,7 +1908,7 @@ private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, for (int i = 0; i < fileIdParams.length; i++) { Long fileId = null; try { - fileId = new Long(fileIdParams[i]); + fileId = Long.parseLong(fileIdParams[i]); validIdCount++; } catch (NumberFormatException nfe) { fileId = null; @@ -2040,7 +1917,7 @@ private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, DataFile file = dataFileService.find(fileId); if (file != null) { validFileCount++; - if (isAccessAuthorized(file, apiToken)) { + if (isAccessAuthorized(file)) { logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance."); if (donotwriteGBResponse != true && file.isReleased()) { GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(file.getOwner(), file, session, apiTokenUser); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index ef08444af69..2c147b94243 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -14,6 +14,7 @@ import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.DataverseSession; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.validation.EMailValidator; import edu.harvard.iq.dataverse.EjbDataverseEngine; import edu.harvard.iq.dataverse.GlobalId; @@ -34,6 +35,7 @@ import edu.harvard.iq.dataverse.authorization.providers.shib.ShibAuthenticationProvider; import edu.harvard.iq.dataverse.authorization.providers.shib.ShibServiceBean; import edu.harvard.iq.dataverse.authorization.providers.shib.ShibUtil; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailData; import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailException; @@ -47,6 +49,7 @@ import javax.json.Json; import javax.json.JsonArrayBuilder; import javax.json.JsonObjectBuilder; +import javax.ws.rs.Consumes; import javax.ws.rs.DELETE; import javax.ws.rs.GET; import javax.ws.rs.POST; @@ -90,6 +93,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.DeleteRoleCommand; import edu.harvard.iq.dataverse.engine.command.impl.DeleteTemplateCommand; import edu.harvard.iq.dataverse.engine.command.impl.RegisterDvObjectCommand; +import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.userdata.UserListMaker; @@ -98,6 +102,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.UrlSignerUtil; import java.io.IOException; import java.io.OutputStream; @@ -1813,6 +1818,9 @@ public Response submitDatasetVersionToArchive(@PathParam("id") String dsid, Dataset ds = findDatasetOrDie(dsid); DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); + if(dv==null) { + return error(Status.BAD_REQUEST, "Requested version not found."); + } if (dv.getArchivalCopyLocation() == null) { String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); // Note - the user is being sent via the createDataverseRequest(au) call to the @@ -1858,7 +1866,7 @@ public void run() { return error(Status.BAD_REQUEST, "Version was already submitted for archiving."); } } catch (WrappedResponse e1) { - return error(Status.UNAUTHORIZED, "api key required"); + return e1.getResponse(); } } @@ -1949,7 +1957,7 @@ public void run() { return error(Status.BAD_REQUEST, "No unarchived published dataset versions found"); } } catch (WrappedResponse e1) { - return error(Status.UNAUTHORIZED, "api key required"); + return e1.getResponse(); } } @@ -2241,4 +2249,52 @@ public Response getBannerMessages(@PathParam("id") Long id) throws WrappedRespon .collect(toJsonArray())); } + + @POST + @Consumes("application/json") + @Path("/requestSignedUrl") + public Response getSignedUrl(JsonObject urlInfo) { + AuthenticatedUser superuser = null; + try { + superuser = findAuthenticatedUserOrDie(); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + if (superuser == null || !superuser.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Requesting signed URLs is restricted to superusers."); + } + + String userId = urlInfo.getString("user"); + String key=null; + if (userId != null) { + AuthenticatedUser user = authSvc.getAuthenticatedUser(userId); + // If a user param was sent, we sign the URL for them, otherwise on behalf of + // the superuser who made this api call + if (user != null) { + ApiToken apiToken = authSvc.findApiTokenByUser(user); + if (apiToken != null && !apiToken.isExpired() && !apiToken.isDisabled()) { + key = apiToken.getTokenString(); + } + } else { + userId = superuser.getUserIdentifier(); + // We ~know this exists - the superuser just used it and it was unexpired/not + // disabled. (ToDo - if we want this to work with workflow tokens (or as a + // signed URL), we should do more checking as for the user above)) + key = authSvc.findApiTokenByUser(superuser).getTokenString(); + } + if (key == null) { + return error(Response.Status.CONFLICT, "Do not have a valid user with apiToken"); + } + key = JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + key; + } + + String baseUrl = urlInfo.getString("url"); + int timeout = urlInfo.getInt(ExternalToolHandler.TIMEOUT, 10); + String method = urlInfo.getString(ExternalToolHandler.HTTP_METHOD, "GET"); + + String signedUrl = UrlSignerUtil.signUrl(baseUrl, timeout, userId, method, key); + + return ok(Json.createObjectBuilder().add(ExternalToolHandler.SIGNED_URL, signedUrl)); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index aff543e643c..0bb6eebb80b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -57,15 +57,17 @@ import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetThumbnailCommand; import edu.harvard.iq.dataverse.export.DDIExportServiceBean; import edu.harvard.iq.dataverse.export.ExportService; +import edu.harvard.iq.dataverse.externaltools.ExternalTool; +import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; - -import edu.harvard.iq.dataverse.S3PackageImporter; +import edu.harvard.iq.dataverse.api.AbstractApiBean.WrappedResponse; import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.UnforcedCommandException; import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetStorageSizeCommand; @@ -113,11 +115,13 @@ import java.time.LocalDateTime; import java.util.*; import java.util.concurrent.*; +import java.util.function.Predicate; import java.time.ZoneId; import java.time.format.DateTimeFormatter; import java.util.Map.Entry; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.regex.Pattern; import java.util.stream.Collectors; import javax.ejb.EJB; @@ -142,7 +146,6 @@ import javax.ws.rs.core.*; import javax.ws.rs.core.Response.Status; import static javax.ws.rs.core.Response.Status.BAD_REQUEST; -import javax.ws.rs.core.UriInfo; import org.apache.commons.lang3.StringUtils; import org.apache.solr.client.solrj.SolrServerException; @@ -155,6 +158,7 @@ public class Datasets extends AbstractApiBean { private static final Logger logger = Logger.getLogger(Datasets.class.getCanonicalName()); + private static final Pattern dataFilePattern = Pattern.compile("^[0-9a-f]{11}-[0-9a-f]{12}\\.?.*"); @Inject DataverseSession session; @@ -630,7 +634,7 @@ public Response updateDraftVersion( String jsonBody, @PathParam("id") String id, DatasetVersion managedVersion; if (updateDraft) { - final DatasetVersion editVersion = ds.getEditVersion(); + final DatasetVersion editVersion = ds.getOrCreateEditVersion(); editVersion.setDatasetFields(incomingVersion.getDatasetFields()); editVersion.setTermsOfUseAndAccess(incomingVersion.getTermsOfUseAndAccess()); editVersion.getTermsOfUseAndAccess().setDatasetVersion(editVersion); @@ -639,7 +643,7 @@ public Response updateDraftVersion( String jsonBody, @PathParam("id") String id, return error(Status.CONFLICT, BundleUtil.getStringFromBundle("dataset.message.toua.invalid")); } Dataset managedDataset = execCommand(new UpdateDatasetVersionCommand(ds, req)); - managedVersion = managedDataset.getEditVersion(); + managedVersion = managedDataset.getOrCreateEditVersion(); } else { boolean hasValidTerms = TermsOfUseAndAccessValidator.isTOUAValid(incomingVersion.getTermsOfUseAndAccess(), null); if (!hasValidTerms) { @@ -698,7 +702,7 @@ public Response updateVersionMetadata(String jsonLDBody, @PathParam("id") String try { Dataset ds = findDatasetOrDie(id); DataverseRequest req = createDataverseRequest(findUserOrDie()); - DatasetVersion dsv = ds.getEditVersion(); + DatasetVersion dsv = ds.getOrCreateEditVersion(); boolean updateDraft = ds.getLatestVersion().isDraft(); dsv = JSONLDUtil.updateDatasetVersionMDFromJsonLD(dsv, jsonLDBody, metadataBlockService, datasetFieldSvc, !replaceTerms, false, licenseSvc); dsv.getTermsOfUseAndAccess().setDatasetVersion(dsv); @@ -709,7 +713,7 @@ public Response updateVersionMetadata(String jsonLDBody, @PathParam("id") String DatasetVersion managedVersion; if (updateDraft) { Dataset managedDataset = execCommand(new UpdateDatasetVersionCommand(ds, req)); - managedVersion = managedDataset.getEditVersion(); + managedVersion = managedDataset.getOrCreateEditVersion(); } else { managedVersion = execCommand(new CreateDatasetVersionCommand(req, ds, dsv)); } @@ -731,14 +735,14 @@ public Response deleteMetadata(String jsonLDBody, @PathParam("id") String id) { try { Dataset ds = findDatasetOrDie(id); DataverseRequest req = createDataverseRequest(findUserOrDie()); - DatasetVersion dsv = ds.getEditVersion(); + DatasetVersion dsv = ds.getOrCreateEditVersion(); boolean updateDraft = ds.getLatestVersion().isDraft(); dsv = JSONLDUtil.deleteDatasetVersionMDFromJsonLD(dsv, jsonLDBody, metadataBlockService, licenseSvc); dsv.getTermsOfUseAndAccess().setDatasetVersion(dsv); DatasetVersion managedVersion; if (updateDraft) { Dataset managedDataset = execCommand(new UpdateDatasetVersionCommand(ds, req)); - managedVersion = managedDataset.getEditVersion(); + managedVersion = managedDataset.getOrCreateEditVersion(); } else { managedVersion = execCommand(new CreateDatasetVersionCommand(req, ds, dsv)); } @@ -769,7 +773,7 @@ private Response processDatasetFieldDataDelete(String jsonBody, String id, Datav Dataset ds = findDatasetOrDie(id); JsonObject json = Json.createReader(rdr).readObject(); - DatasetVersion dsv = ds.getEditVersion(); + DatasetVersion dsv = ds.getOrCreateEditVersion(); dsv.getTermsOfUseAndAccess().setDatasetVersion(dsv); List fields = new LinkedList<>(); DatasetField singleField = null; @@ -882,7 +886,7 @@ private Response processDatasetFieldDataDelete(String jsonBody, String id, Datav boolean updateDraft = ds.getLatestVersion().isDraft(); DatasetVersion managedVersion = updateDraft - ? execCommand(new UpdateDatasetVersionCommand(ds, req)).getEditVersion() + ? execCommand(new UpdateDatasetVersionCommand(ds, req)).getOrCreateEditVersion() : execCommand(new CreateDatasetVersionCommand(req, ds, dsv)); return ok(json(managedVersion)); @@ -932,7 +936,7 @@ private Response processDatasetUpdate(String jsonBody, String id, DataverseReque Dataset ds = findDatasetOrDie(id); JsonObject json = Json.createReader(rdr).readObject(); - DatasetVersion dsv = ds.getEditVersion(); + DatasetVersion dsv = ds.getOrCreateEditVersion(); dsv.getTermsOfUseAndAccess().setDatasetVersion(dsv); List fields = new LinkedList<>(); DatasetField singleField = null; @@ -986,6 +990,7 @@ private Response processDatasetUpdate(String jsonBody, String id, DataverseReque dsf.setSingleValue(""); dsf.setSingleControlledVocabularyValue(null); } + cvvDisplay=""; } if (updateField.getDatasetFieldType().isControlledVocabulary()) { if (dsf.getDatasetFieldType().isAllowMultiples()) { @@ -1037,7 +1042,7 @@ private Response processDatasetUpdate(String jsonBody, String id, DataverseReque DatasetVersion managedVersion; if (updateDraft) { - managedVersion = execCommand(new UpdateDatasetVersionCommand(ds, req)).getEditVersion(); + managedVersion = execCommand(new UpdateDatasetVersionCommand(ds, req)).getOrCreateEditVersion(); } else { managedVersion = execCommand(new CreateDatasetVersionCommand(req, ds, dsv)); } @@ -2451,8 +2456,7 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, fileService, permissionSvc, commandEngine, - systemConfig, - licenseSvc); + systemConfig); //------------------- @@ -2502,6 +2506,76 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, } // end: addFileToDataset + /** + * Clean storage of a Dataset + * + * @param idSupplied + * @return + */ + @GET + @Path("{id}/cleanStorage") + public Response cleanStorage(@PathParam("id") String idSupplied, @QueryParam("dryrun") Boolean dryrun) { + // get user and dataset + User authUser; + try { + authUser = findUserOrDie(); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, + BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + Dataset dataset; + try { + dataset = findDatasetOrDie(idSupplied); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + // check permissions + if (!permissionSvc.permissionsFor(createDataverseRequest(authUser), dataset).contains(Permission.EditDataset)) { + return error(Response.Status.INTERNAL_SERVER_ERROR, "Access denied!"); + } + + boolean doDryRun = dryrun != null && dryrun.booleanValue(); + + // check if no legacy files are present + Set datasetFilenames = getDatasetFilenames(dataset); + if (datasetFilenames.stream().anyMatch(x -> !dataFilePattern.matcher(x).matches())) { + logger.log(Level.WARNING, "Dataset contains legacy files not matching the naming pattern!"); + } + + Predicate filter = getToDeleteFilesFilter(datasetFilenames); + List deleted; + try { + StorageIO datasetIO = DataAccess.getStorageIO(dataset); + deleted = datasetIO.cleanUp(filter, doDryRun); + } catch (IOException ex) { + logger.log(Level.SEVERE, null, ex); + return error(Response.Status.INTERNAL_SERVER_ERROR, "IOException! Serious Error! See administrator!"); + } + + return ok("Found: " + datasetFilenames.stream().collect(Collectors.joining(", ")) + "\n" + "Deleted: " + deleted.stream().collect(Collectors.joining(", "))); + + } + + private static Set getDatasetFilenames(Dataset dataset) { + Set files = new HashSet<>(); + for (DataFile dataFile: dataset.getFiles()) { + String storageIdentifier = dataFile.getStorageIdentifier(); + String location = storageIdentifier.substring(storageIdentifier.indexOf("://") + 3); + String[] locationParts = location.split(":");//separate bucket, swift container, etc. from fileName + files.add(locationParts[locationParts.length-1]); + } + return files; + } + + public static Predicate getToDeleteFilesFilter(Set datasetFilenames) { + return f -> { + return dataFilePattern.matcher(f).matches() && datasetFilenames.stream().noneMatch(x -> f.startsWith(x)); + }; + } + private void msg(String m) { //System.out.println(m); logger.fine(m); @@ -3387,14 +3461,84 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, this.fileService, this.permissionSvc, this.commandEngine, - this.systemConfig, - this.licenseSvc + this.systemConfig ); return addFileHelper.addFiles(jsonData, dataset, authUser); } + /** + * Replace multiple Files to an existing Dataset + * + * @param idSupplied + * @param jsonData + * @return + */ + @POST + @Path("{id}/replaceFiles") + @Consumes(MediaType.MULTIPART_FORM_DATA) + public Response replaceFilesInDataset(@PathParam("id") String idSupplied, + @FormDataParam("jsonData") String jsonData) { + + if (!systemConfig.isHTTPUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + } + + // ------------------------------------- + // (1) Get the user from the API key + // ------------------------------------- + User authUser; + try { + authUser = findUserOrDie(); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(idSupplied); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + dataset.getLocks().forEach(dl -> { + logger.info(dl.toString()); + }); + + //------------------------------------ + // (2a) Make sure dataset does not have package file + // -------------------------------------- + + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.isHasPackageFile()) { + return error(Response.Status.FORBIDDEN, + BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") + ); + } + } + + DataverseRequest dvRequest = createDataverseRequest(authUser); + + AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( + dvRequest, + this.ingestService, + this.datasetService, + this.fileService, + this.permissionSvc, + this.commandEngine, + this.systemConfig + ); + + return addFileHelper.replaceFiles(jsonData, dataset, authUser); + + } + /** * API to find curation assignments and statuses * @@ -3581,4 +3725,42 @@ private boolean isSingleVersionArchiving() { } return false; } + + // This method provides a callback for an external tool to retrieve it's + // parameters/api URLs. If the request is authenticated, e.g. by it being + // signed, the api URLs will be signed. If a guest request is made, the URLs + // will be plain/unsigned. + // This supports the cases where a tool is accessing a restricted resource (e.g. + // for a draft dataset), or public case. + @GET + @Path("{id}/versions/{version}/toolparams/{tid}") + public Response getExternalToolDVParams(@PathParam("tid") long externalToolId, + @PathParam("id") String datasetId, @PathParam("version") String version, @QueryParam(value = "locale") String locale) { + try { + DataverseRequest req = createDataverseRequest(findUserOrDie()); + DatasetVersion target = getDatasetVersionOrDie(req, version, findDatasetOrDie(datasetId), null, null); + if (target == null) { + return error(BAD_REQUEST, "DatasetVersion not found."); + } + + ExternalTool externalTool = externalToolService.findById(externalToolId); + if(externalTool==null) { + return error(BAD_REQUEST, "External tool not found."); + } + if (!ExternalTool.Scope.DATASET.equals(externalTool.getScope())) { + return error(BAD_REQUEST, "External tool does not have dataset scope."); + } + ApiToken apiToken = null; + User u = findUserOrDie(); + if (u instanceof AuthenticatedUser) { + apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) u); + } + + + ExternalToolHandler eth = new ExternalToolHandler(externalTool, target.getDataset(), apiToken, locale); + return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())))); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/ExternalTools.java b/src/main/java/edu/harvard/iq/dataverse/api/ExternalTools.java index aef30bfb0c2..e53b54482b8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/ExternalTools.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/ExternalTools.java @@ -1,7 +1,6 @@ package edu.harvard.iq.dataverse.api; import edu.harvard.iq.dataverse.actionlogging.ActionLogRecord; -import static edu.harvard.iq.dataverse.api.AbstractApiBean.error; import edu.harvard.iq.dataverse.externaltools.ExternalTool; import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean; import java.util.logging.Logger; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index 9dc0c3be524..965d56d355e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -14,6 +14,7 @@ import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.TermsOfUseAndAccessValidator; import edu.harvard.iq.dataverse.UserNotificationServiceBean; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.datasetutility.AddReplaceFileHelper; @@ -31,6 +32,8 @@ import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.export.ExportException; import edu.harvard.iq.dataverse.export.ExportService; +import edu.harvard.iq.dataverse.externaltools.ExternalTool; +import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; import edu.harvard.iq.dataverse.ingest.IngestRequest; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.ingest.IngestUtil; @@ -40,9 +43,10 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import java.io.InputStream; -import java.io.StringReader; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -51,10 +55,8 @@ import javax.ejb.EJB; import javax.inject.Inject; import javax.json.Json; -import javax.json.JsonReader; import javax.servlet.http.HttpServletResponse; import javax.ws.rs.Consumes; -import javax.ws.rs.DELETE; import javax.ws.rs.GET; import javax.ws.rs.POST; import javax.ws.rs.PUT; @@ -231,7 +233,6 @@ public Response replaceFileInDataset( if (null == contentDispositionHeader) { if (optionalFileParams.hasStorageIdentifier()) { newStorageIdentifier = optionalFileParams.getStorageIdentifier(); - // ToDo - check that storageIdentifier is valid if (optionalFileParams.hasFileName()) { newFilename = optionalFileParams.getFileName(); if (optionalFileParams.hasMimetype()) { @@ -257,39 +258,34 @@ public Response replaceFileInDataset( this.fileService, this.permissionSvc, this.commandEngine, - this.systemConfig, - this.licenseSvc); + this.systemConfig); // (5) Run "runReplaceFileByDatasetId" long fileToReplaceId = 0; try { DataFile dataFile = findDataFileOrDie(fileIdOrPersistentId); fileToReplaceId = dataFile.getId(); - - if (dataFile.isFilePackage()) { - return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile")); + + if (dataFile.isFilePackage()) { + return error(Response.Status.SERVICE_UNAVAILABLE, + BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile")); + } + + if (forceReplace) { + addFileHelper.runForceReplaceFile(fileToReplaceId, newFilename, newFileContentType, + newStorageIdentifier, testFileInputStream, dataFile.getOwner(), optionalFileParams); + } else { + addFileHelper.runReplaceFile(fileToReplaceId, newFilename, newFileContentType, newStorageIdentifier, + testFileInputStream, dataFile.getOwner(), optionalFileParams); } } catch (WrappedResponse ex) { - String error = BundleUtil.getStringFromBundle("file.addreplace.error.existing_file_to_replace_not_found_by_id", Arrays.asList(fileIdOrPersistentId)); - // TODO: Some day, return ex.getResponse() instead. Also run FilesIT and updated expected status code and message. + String error = BundleUtil.getStringFromBundle( + "file.addreplace.error.existing_file_to_replace_not_found_by_id", + Arrays.asList(fileIdOrPersistentId)); + // TODO: Some day, return ex.getResponse() instead. Also run FilesIT and updated + // expected status code and message. return error(BAD_REQUEST, error); } - if (forceReplace){ - addFileHelper.runForceReplaceFile(fileToReplaceId, - newFilename, - newFileContentType, - newStorageIdentifier, - testFileInputStream, - optionalFileParams); - }else{ - addFileHelper.runReplaceFile(fileToReplaceId, - newFilename, - newFileContentType, - newStorageIdentifier, - testFileInputStream, - optionalFileParams); - } - msg("we're back....."); if (addFileHelper.hasError()){ msg("yes, has error"); @@ -388,7 +384,7 @@ public Response updateFileMetadata(@FormDataParam("jsonData") String jsonData, } try { - DatasetVersion editVersion = df.getOwner().getEditVersion(); + DatasetVersion editVersion = df.getOwner().getOrCreateEditVersion(); //We get the new fileMetadata from the new version //This is because after generating the draft with getEditVersion, @@ -407,8 +403,7 @@ public Response updateFileMetadata(@FormDataParam("jsonData") String jsonData, return error(Response.Status.BAD_REQUEST, "An error has occurred attempting to update the requested DataFile. It is not part of the current version of the Dataset."); } - JsonReader jsonReader = Json.createReader(new StringReader(jsonData)); - javax.json.JsonObject jsonObject = jsonReader.readObject(); + javax.json.JsonObject jsonObject = JsonUtil.getJsonObject(jsonData); String incomingLabel = jsonObject.getString("label", null); String incomingDirectoryLabel = jsonObject.getString("directoryLabel", null); String existingLabel = df.getFileMetadata().getLabel(); @@ -448,10 +443,81 @@ public Response updateFileMetadata(@FormDataParam("jsonData") String jsonData, .build(); } + @GET + @Path("{id}/draft") + public Response getFileDataDraft(@PathParam("id") String fileIdOrPersistentId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WrappedResponse, Exception { + return getFileDataResponse(fileIdOrPersistentId, uriInfo, headers, response, true); + } + + @GET + @Path("{id}") + public Response getFileData(@PathParam("id") String fileIdOrPersistentId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WrappedResponse, Exception { + return getFileDataResponse(fileIdOrPersistentId, uriInfo, headers, response, false); + } + + private Response getFileDataResponse(String fileIdOrPersistentId, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response, boolean draft ){ + + DataverseRequest req; + try { + req = createDataverseRequest(findUserOrDie()); + } catch (Exception e) { + return error(BAD_REQUEST, "Error attempting to request information. Maybe a bad API token?"); + } + final DataFile df; + try { + df = execCommand(new GetDataFileCommand(req, findDataFileOrDie(fileIdOrPersistentId))); + } catch (Exception e) { + return error(BAD_REQUEST, "Error attempting get the requested data file."); + } + + FileMetadata fm; + + if (draft) { + try { + fm = execCommand(new GetDraftFileMetadataIfAvailableCommand(req, df)); + } catch (WrappedResponse w) { + return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset."); + } + if (null == fm) { + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.no.draft")); + } + } else { + //first get latest published + //if not available get draft if permissible + + try { + fm = df.getLatestPublishedFileMetadata(); + + } catch (UnsupportedOperationException e) { + try { + fm = execCommand(new GetDraftFileMetadataIfAvailableCommand(req, df)); + } catch (WrappedResponse w) { + return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset."); + } + if (null == fm) { + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.no.draft")); + } + } + + } + + if (fm.getDatasetVersion().isReleased()) { + MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountLoggingServiceBean.MakeDataCountEntry(uriInfo, headers, dvRequestService, df); + mdcLogService.logEntry(entry); + } + + return Response.ok(Json.createObjectBuilder() + .add("status", STATUS_OK) + .add("data", json(fm)).build()) + .type(MediaType.APPLICATION_JSON) + .build(); + } + @GET @Path("{id}/metadata") public Response getFileMetadata(@PathParam("id") String fileIdOrPersistentId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response, Boolean getDraft) throws WrappedResponse, Exception { - DataverseRequest req; + //ToDo - versionId is not used - can't get metadata for earlier versions + DataverseRequest req; try { req = createDataverseRequest(findUserOrDie()); } catch (Exception e) { @@ -472,7 +538,7 @@ public Response getFileMetadata(@PathParam("id") String fileIdOrPersistentId, @P return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset." ); } if(null == fm) { - return error(BAD_REQUEST, "No draft availabile for this dataset"); + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.no.draft")); } } else { fm = df.getLatestPublishedFileMetadata(); @@ -488,6 +554,7 @@ public Response getFileMetadata(@PathParam("id") String fileIdOrPersistentId, @P .type(MediaType.TEXT_PLAIN) //Our plain text string is already json .build(); } + @GET @Path("{id}/metadata/draft") public Response getFileMetadataDraft(@PathParam("id") String fileIdOrPersistentId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response, Boolean getDraft) throws WrappedResponse, Exception { @@ -620,6 +687,27 @@ public Response redetectDatafile(@PathParam("id") String id, @QueryParam("dryRun } } + @Path("{id}/extractNcml") + @POST + public Response extractNcml(@PathParam("id") String id) { + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + if (!au.isSuperuser()) { + // We can always make a command in the future if there's a need + // for non-superusers to call this API. + return error(Response.Status.FORBIDDEN, "This API call can be used by superusers only"); + } + DataFile dataFileIn = findDataFileOrDie(id); + java.nio.file.Path tempLocationPath = null; + boolean successOrFail = ingestService.extractMetadataNcml(dataFileIn, tempLocationPath); + NullSafeJsonBuilder result = NullSafeJsonBuilder.jsonObjectBuilder() + .add("result", successOrFail); + return ok(result); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } + /** * Attempting to run metadata export, for all the formats for which we have * metadata Exporters. @@ -639,4 +727,40 @@ private void exportDatasetMetadata(SettingsServiceBean settingsServiceBean, Data } } + // This method provides a callback for an external tool to retrieve it's + // parameters/api URLs. If the request is authenticated, e.g. by it being + // signed, the api URLs will be signed. If a guest request is made, the URLs + // will be plain/unsigned. + // This supports the cases where a tool is accessing a restricted resource (e.g. + // preview of a draft file), or public case. + @GET + @Path("{id}/metadata/{fmid}/toolparams/{tid}") + public Response getExternalToolFMParams(@PathParam("tid") long externalToolId, + @PathParam("id") String fileId, @PathParam("fmid") long fmid, @QueryParam(value = "locale") String locale) { + try { + ExternalTool externalTool = externalToolService.findById(externalToolId); + if(externalTool == null) { + return error(BAD_REQUEST, "External tool not found."); + } + if (!ExternalTool.Scope.FILE.equals(externalTool.getScope())) { + return error(BAD_REQUEST, "External tool does not have file scope."); + } + ApiToken apiToken = null; + User u = findUserOrDie(); + if (u instanceof AuthenticatedUser) { + apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) u); + } + FileMetadata target = fileSvc.findFileMetadata(fmid); + if (target == null) { + return error(BAD_REQUEST, "FileMetadata not found."); + } + + ExternalToolHandler eth = null; + + eth = new ExternalToolHandler(externalTool, target.getDataFile(), apiToken, target, locale); + return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())))); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java index d17e76c499a..9aea3adab8b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java @@ -5,13 +5,17 @@ import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.impl.CreateHarvestingClientCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetHarvestingClientCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateHarvestingClientCommand; import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean; import edu.harvard.iq.dataverse.harvest.client.HarvestingClientServiceBean; +import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.json.JsonParseException; +import edu.harvard.iq.dataverse.util.json.JsonPrinter; import javax.json.JsonObjectBuilder; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import java.io.IOException; @@ -20,10 +24,10 @@ import java.util.List; import java.util.logging.Logger; import javax.ejb.EJB; -import javax.ejb.Stateless; import javax.json.Json; import javax.json.JsonArrayBuilder; import javax.json.JsonObject; +import javax.ws.rs.DELETE; import javax.ws.rs.GET; import javax.ws.rs.POST; import javax.ws.rs.PUT; @@ -32,13 +36,10 @@ import javax.ws.rs.QueryParam; import javax.ws.rs.core.Response; -@Stateless @Path("harvest/clients") public class HarvestingClients extends AbstractApiBean { - @EJB - DataverseServiceBean dataverseService; @EJB HarvesterServiceBean harvesterService; @EJB @@ -88,7 +89,7 @@ public Response harvestingClients(@QueryParam("key") String apiKey) throws IOExc } if (retrievedHarvestingClient != null) { - hcArr.add(harvestingConfigAsJson(retrievedHarvestingClient)); + hcArr.add(JsonPrinter.json(retrievedHarvestingClient)); } } @@ -111,6 +112,10 @@ public Response harvestingClient(@PathParam("nickName") String nickName, @QueryP return error(Response.Status.NOT_FOUND, "Harvesting client " + nickName + " not found."); } + // See the comment in the harvestingClients() (plural) above for the explanation + // of why we are looking up the client twice (tl;dr: to utilize the + // authorization logic in the command) + HarvestingClient retrievedHarvestingClient = null; try { @@ -118,7 +123,7 @@ public Response harvestingClient(@PathParam("nickName") String nickName, @QueryP // exception, that already has a proper HTTP response in it. retrievedHarvestingClient = execCommand(new GetHarvestingClientCommand(createDataverseRequest(findUserOrDie()), harvestingClient)); - logger.info("retrieved Harvesting Client " + retrievedHarvestingClient.getName() + " with the GetHarvestingClient command."); + logger.fine("retrieved Harvesting Client " + retrievedHarvestingClient.getName() + " with the GetHarvestingClient command."); } catch (WrappedResponse wr) { return wr.getResponse(); } catch (Exception ex) { @@ -132,7 +137,7 @@ public Response harvestingClient(@PathParam("nickName") String nickName, @QueryP } try { - return ok(harvestingConfigAsJson(retrievedHarvestingClient)); + return ok(JsonPrinter.json(retrievedHarvestingClient)); } catch (Exception ex) { logger.warning("Unknown exception caught while trying to format harvesting client config as json: "+ex.getMessage()); return error( Response.Status.BAD_REQUEST, @@ -143,29 +148,76 @@ public Response harvestingClient(@PathParam("nickName") String nickName, @QueryP @POST @Path("{nickName}") public Response createHarvestingClient(String jsonBody, @PathParam("nickName") String nickName, @QueryParam("key") String apiKey) throws IOException, JsonParseException { - + // Per the discussion during the QA of PR #9174, we decided to make + // the create/edit APIs superuser-only (the delete API was already so) + try { + User u = findUserOrDie(); + if ((!(u instanceof AuthenticatedUser) || !u.isSuperuser())) { + throw new WrappedResponse(error(Response.Status.UNAUTHORIZED, "Only superusers can create harvesting clients.")); + } + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + try ( StringReader rdr = new StringReader(jsonBody) ) { JsonObject json = Json.createReader(rdr).readObject(); + // Check that the client with this name doesn't exist yet: + // (we could simply let the command fail, but that does not result + // in a pretty report to the end user) + + HarvestingClient lookedUpClient = null; + try { + lookedUpClient = harvestingClientService.findByNickname(nickName); + } catch (Exception ex) { + logger.warning("Exception caught looking up harvesting client " + nickName + ": " + ex.getMessage()); + // let's hope that this was a fluke of some kind; we'll proceed + // with the attempt to create a new client and report an error + // if that fails too. + } + + if (lookedUpClient != null) { + return error(Response.Status.BAD_REQUEST, "Harvesting client " + nickName + " already exists"); + } + HarvestingClient harvestingClient = new HarvestingClient(); - // TODO: check that it doesn't exist yet... - harvestingClient.setName(nickName); + String dataverseAlias = jsonParser().parseHarvestingClient(json, harvestingClient); - Dataverse ownerDataverse = dataverseService.findByAlias(dataverseAlias); + if (dataverseAlias == null) { + return error(Response.Status.BAD_REQUEST, "dataverseAlias must be supplied"); + } + + // Check if the dataverseAlias supplied is valid, i.e. corresponds + // to an existing dataverse (collection): + Dataverse ownerDataverse = dataverseSvc.findByAlias(dataverseAlias); if (ownerDataverse == null) { return error(Response.Status.BAD_REQUEST, "No such dataverse: " + dataverseAlias); } + // The nickname supplied as part of the Rest path takes precedence: + harvestingClient.setName(nickName); + + // Populate the description field, if none is supplied: + if (harvestingClient.getArchiveDescription() == null) { + harvestingClient.setArchiveDescription(BundleUtil.getStringFromBundle("harvestclients.viewEditDialog.archiveDescription.default.generic")); + } + + if (StringUtil.isEmpty(harvestingClient.getArchiveUrl()) + || StringUtil.isEmpty(harvestingClient.getHarvestingUrl()) + || StringUtil.isEmpty(harvestingClient.getMetadataPrefix())) { + return error(Response.Status.BAD_REQUEST, "Required fields harvestUrl, archiveUrl and metadataFormat must be supplied"); + } + harvestingClient.setDataverse(ownerDataverse); if (ownerDataverse.getHarvestingClientConfigs() == null) { ownerDataverse.setHarvestingClientConfigs(new ArrayList<>()); } ownerDataverse.getHarvestingClientConfigs().add(harvestingClient); - + DataverseRequest req = createDataverseRequest(findUserOrDie()); - HarvestingClient managedHarvestingClient = execCommand( new CreateHarvestingClientCommand(req, harvestingClient)); - return created( "/harvest/clients/" + nickName, harvestingConfigAsJson(managedHarvestingClient)); + harvestingClient = execCommand(new CreateHarvestingClientCommand(req, harvestingClient)); + return created( "/harvest/clients/" + nickName, JsonPrinter.json(harvestingClient)); } catch (JsonParseException ex) { return error( Response.Status.BAD_REQUEST, "Error parsing harvesting client: " + ex.getMessage() ); @@ -180,6 +232,15 @@ public Response createHarvestingClient(String jsonBody, @PathParam("nickName") S @PUT @Path("{nickName}") public Response modifyHarvestingClient(String jsonBody, @PathParam("nickName") String nickName, @QueryParam("key") String apiKey) throws IOException, JsonParseException { + try { + User u = findUserOrDie(); + if ((!(u instanceof AuthenticatedUser) || !u.isSuperuser())) { + throw new WrappedResponse(error(Response.Status.UNAUTHORIZED, "Only superusers can modify harvesting clients.")); + } + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + HarvestingClient harvestingClient = null; try { harvestingClient = harvestingClientService.findByNickname(nickName); @@ -198,15 +259,44 @@ public Response modifyHarvestingClient(String jsonBody, @PathParam("nickName") S DataverseRequest req = createDataverseRequest(findUserOrDie()); JsonObject json = Json.createReader(rdr).readObject(); - String newDataverseAlias = jsonParser().parseHarvestingClient(json, harvestingClient); + HarvestingClient newHarvestingClient = new HarvestingClient(); + String newDataverseAlias = jsonParser().parseHarvestingClient(json, newHarvestingClient); if (newDataverseAlias != null && !newDataverseAlias.equals("") && !newDataverseAlias.equals(ownerDataverseAlias)) { return error(Response.Status.BAD_REQUEST, "Bad \"dataverseAlias\" supplied. Harvesting client "+nickName+" belongs to the dataverse "+ownerDataverseAlias); } - HarvestingClient managedHarvestingClient = execCommand( new UpdateHarvestingClientCommand(req, harvestingClient)); - return created( "/datasets/" + nickName, harvestingConfigAsJson(managedHarvestingClient)); + + // Go through the supported editable fields and update the client accordingly: + // TODO: We may want to reevaluate whether we really want/need *all* + // of these fields to be editable. + + if (newHarvestingClient.getHarvestingUrl() != null) { + harvestingClient.setHarvestingUrl(newHarvestingClient.getHarvestingUrl()); + } + if (newHarvestingClient.getHarvestingSet() != null) { + harvestingClient.setHarvestingSet(newHarvestingClient.getHarvestingSet()); + } + if (newHarvestingClient.getMetadataPrefix() != null) { + harvestingClient.setMetadataPrefix(newHarvestingClient.getMetadataPrefix()); + } + if (newHarvestingClient.getArchiveUrl() != null) { + harvestingClient.setArchiveUrl(newHarvestingClient.getArchiveUrl()); + } + if (newHarvestingClient.getArchiveDescription() != null) { + harvestingClient.setArchiveDescription(newHarvestingClient.getArchiveDescription()); + } + if (newHarvestingClient.getHarvestStyle() != null) { + harvestingClient.setHarvestStyle(newHarvestingClient.getHarvestStyle()); + } + if (newHarvestingClient.getCustomHttpHeaders() != null) { + harvestingClient.setCustomHttpHeaders(newHarvestingClient.getCustomHttpHeaders()); + } + // TODO: Make schedule configurable via this API too. + + harvestingClient = execCommand( new UpdateHarvestingClientCommand(req, harvestingClient)); + return ok( "/harvest/clients/" + nickName, JsonPrinter.json(harvestingClient)); // harvestingConfigAsJson(harvestingClient)); } catch (JsonParseException ex) { return error( Response.Status.BAD_REQUEST, "Error parsing harvesting client: " + ex.getMessage() ); @@ -218,9 +308,58 @@ public Response modifyHarvestingClient(String jsonBody, @PathParam("nickName") S } - // TODO: - // add a @DELETE method - // (there is already a DeleteHarvestingClient command) + @DELETE + @Path("{nickName}") + public Response deleteHarvestingClient(@PathParam("nickName") String nickName) throws IOException { + // Deleting a client can take a while (if there's a large amnount of + // harvested content associated with it). So instead of calling the command + // directly, we will be calling an async. service bean method. + + + try { + User u = findUserOrDie(); + if ((!(u instanceof AuthenticatedUser) || !u.isSuperuser())) { + throw new WrappedResponse(error(Response.Status.UNAUTHORIZED, "Only superusers can delete harvesting clients.")); + } + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + HarvestingClient harvestingClient = null; + + try { + harvestingClient = harvestingClientService.findByNickname(nickName); + } catch (Exception ex) { + logger.warning("Exception caught looking up harvesting client " + nickName + ": " + ex.getMessage()); + return error( Response.Status.BAD_REQUEST, "Internal error: failed to look up harvesting client " + nickName); + } + + if (harvestingClient == null) { + return error(Response.Status.NOT_FOUND, "Harvesting client " + nickName + " not found."); + } + + // Check if the client is in a state where it can be safely deleted: + + if (harvestingClient.isDeleteInProgress()) { + return error( Response.Status.BAD_REQUEST, "Harvesting client " + nickName + " is already being deleted (in progress)"); + } + + if (harvestingClient.isHarvestingNow()) { + return error( Response.Status.BAD_REQUEST, "It is not safe to delete client " + nickName + " while a harvesting job is in progress"); + } + + // Finally, delete it (asynchronously): + + try { + harvestingClientService.deleteClient(harvestingClient.getId()); + } catch (Exception ex) { + return error( Response.Status.BAD_REQUEST, "Internal error: failed to delete harvesting client " + nickName); + } + + + return ok("Harvesting Client " + nickName + ": delete in progress"); + } + // Methods for managing harvesting runs (jobs): @@ -240,13 +379,13 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname, } if (authenticatedUser == null || !authenticatedUser.isSuperuser()) { - return error(Response.Status.FORBIDDEN, "Only the Dataverse Admin user can run harvesting jobs"); + return error(Response.Status.FORBIDDEN, "Only admin users can run harvesting jobs"); } HarvestingClient harvestingClient = harvestingClientService.findByNickname(clientNickname); if (harvestingClient == null) { - return error(Response.Status.NOT_FOUND, "No such dataverse: "+clientNickname); + return error(Response.Status.NOT_FOUND, "No such client: "+clientNickname); } DataverseRequest dataverseRequest = createDataverseRequest(authenticatedUser); @@ -257,58 +396,4 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname, } return this.accepted(); } - - // This GET shows the status of the harvesting run in progress for this - // client, if present: - // @GET - // @Path("{nickName}/run") - // TODO: - - // This DELETE kills the harvesting run in progress for this client, - // if present: - // @DELETE - // @Path("{nickName}/run") - // TODO: - - - - - - /* Auxiliary, helper methods: */ - - /* - @Deprecated - public static JsonArrayBuilder harvestingConfigsAsJsonArray(List harvestingDataverses) { - JsonArrayBuilder hdArr = Json.createArrayBuilder(); - - for (Dataverse hd : harvestingDataverses) { - hdArr.add(harvestingConfigAsJson(hd.getHarvestingClientConfig())); - } - return hdArr; - }*/ - - public static JsonObjectBuilder harvestingConfigAsJson(HarvestingClient harvestingConfig) { - if (harvestingConfig == null) { - return null; - } - - - return jsonObjectBuilder().add("nickName", harvestingConfig.getName()). - add("dataverseAlias", harvestingConfig.getDataverse().getAlias()). - add("type", harvestingConfig.getHarvestType()). - add("harvestUrl", harvestingConfig.getHarvestingUrl()). - add("archiveUrl", harvestingConfig.getArchiveUrl()). - add("archiveDescription",harvestingConfig.getArchiveDescription()). - add("metadataFormat", harvestingConfig.getMetadataPrefix()). - add("set", harvestingConfig.getHarvestingSet() == null ? "N/A" : harvestingConfig.getHarvestingSet()). - add("schedule", harvestingConfig.isScheduled() ? harvestingConfig.getScheduleDescription() : "none"). - add("status", harvestingConfig.isHarvestingNow() ? "inProgress" : "inActive"). - add("lastHarvest", harvestingConfig.getLastHarvestTime() == null ? "N/A" : harvestingConfig.getLastHarvestTime().toString()). - add("lastResult", harvestingConfig.getLastResult()). - add("lastSuccessful", harvestingConfig.getLastSuccessfulHarvestTime() == null ? "N/A" : harvestingConfig.getLastSuccessfulHarvestTime().toString()). - add("lastNonEmpty", harvestingConfig.getLastNonEmptyHarvestTime() == null ? "N/A" : harvestingConfig.getLastNonEmptyHarvestTime().toString()). - add("lastDatasetsHarvested", harvestingConfig.getLastHarvestedDatasetCount() == null ? "N/A" : harvestingConfig.getLastHarvestedDatasetCount().toString()). - add("lastDatasetsDeleted", harvestingConfig.getLastDeletedDatasetCount() == null ? "N/A" : harvestingConfig.getLastDeletedDatasetCount().toString()). - add("lastDatasetsFailed", harvestingConfig.getLastFailedDatasetCount() == null ? "N/A" : harvestingConfig.getLastFailedDatasetCount().toString()); - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Info.java b/src/main/java/edu/harvard/iq/dataverse/api/Info.java index 4fe5cba5b9f..fd7824c15cf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Info.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Info.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; import javax.ejb.EJB; @@ -44,7 +45,7 @@ public Response getInfo() { @GET @Path("server") public Response getServer() { - return response( req -> ok(systemConfig.getDataverseServer())); + return response( req -> ok(JvmSettings.FQDN.lookup())); } @GET diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Licenses.java b/src/main/java/edu/harvard/iq/dataverse/api/Licenses.java index 58e1f8cc2c5..1fdf7818cfb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Licenses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Licenses.java @@ -146,6 +146,37 @@ public Response setActiveState(@PathParam("id") long id, @PathParam("activeState } } + @PUT + @Path("/{id}/:sortOrder/{sortOrder}") + public Response setSortOrder(@PathParam("id") long id, @PathParam("sortOrder") long sortOrder) { + User authenticatedUser; + try { + authenticatedUser = findAuthenticatedUserOrDie(); + if (!authenticatedUser.isSuperuser()) { + return error(Status.FORBIDDEN, "must be superuser"); + } + } catch (WrappedResponse e) { + return error(Status.UNAUTHORIZED, "api key required"); + } + try { + if (licenseSvc.setSortOrder(id, sortOrder) == 0) { + return error(Response.Status.NOT_FOUND, "License with ID " + id + " not found"); + } + License license = licenseSvc.getById(id); + actionLogSvc + .log(new ActionLogRecord(ActionLogRecord.ActionType.Admin, "sortOrderLicenseChanged") + .setInfo("License " + license.getName() + "(" + license.getUri() + ") as id: " + id + + "has now sort order " + sortOrder + ".") + .setUserIdentifier(authenticatedUser.getIdentifier())); + return ok("License ID " + id + " sort order set to " + sortOrder); + } catch (WrappedResponse e) { + if (e.getCause() instanceof IllegalArgumentException) { + return badRequest(e.getCause().getMessage()); + } + return error(Response.Status.INTERNAL_SERVER_ERROR, e.getMessage()); + } + } + @DELETE @Path("/{id}") public Response deleteLicenseById(@PathParam("id") long id) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Search.java b/src/main/java/edu/harvard/iq/dataverse/api/Search.java index 71cb59ff62a..cef509b1ec5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Search.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Search.java @@ -72,6 +72,8 @@ public Response search( @QueryParam("show_my_data") boolean showMyData, @QueryParam("query_entities") boolean queryEntities, @QueryParam("metadata_fields") List metadataFields, + @QueryParam("geo_point") String geoPointRequested, + @QueryParam("geo_radius") String geoRadiusRequested, @Context HttpServletResponse response ) { @@ -87,6 +89,8 @@ public Response search( // sanity checking on user-supplied arguments SortBy sortBy; int numResultsPerPage; + String geoPoint; + String geoRadius; List dataverseSubtrees = new ArrayList<>(); try { @@ -119,6 +123,17 @@ public Response search( throw new IOException("Filter is empty, which should never happen, as this allows unfettered searching of our index"); } + geoPoint = getGeoPoint(geoPointRequested); + geoRadius = getGeoRadius(geoRadiusRequested); + + if (geoPoint != null && geoRadius == null) { + return error(Response.Status.BAD_REQUEST, "If you supply geo_point you must also supply geo_radius."); + } + + if (geoRadius != null && geoPoint == null) { + return error(Response.Status.BAD_REQUEST, "If you supply geo_radius you must also supply geo_point."); + } + } catch (Exception ex) { return error(Response.Status.BAD_REQUEST, ex.getLocalizedMessage()); } @@ -137,7 +152,9 @@ public Response search( paginationStart, dataRelatedToMe, numResultsPerPage, - true //SEK get query entities always for search API additional Dataset Information 6300 12/6/2019 + true, //SEK get query entities always for search API additional Dataset Information 6300 12/6/2019 + geoPoint, + geoRadius ); } catch (SearchException ex) { Throwable cause = ex; @@ -340,4 +357,12 @@ private Dataverse getSubtree(String alias) throws Exception { } } + private String getGeoPoint(String geoPointRequested) { + return SearchUtil.getGeoPoint(geoPointRequested); + } + + private String getGeoRadius(String geoRadiusRequested) { + return SearchUtil.getGeoRadius(geoRadiusRequested); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java index b532fbd4154..42caa95b9f5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java @@ -63,7 +63,9 @@ public Response getExternalToolsForFile(@PathParam("id") String idSupplied, @Que ApiToken apiToken = externalToolService.getApiToken(getRequestApiKey()); ExternalToolHandler externalToolHandler = new ExternalToolHandler(tool, dataFile, apiToken, dataFile.getFileMetadata(), null); JsonObjectBuilder toolToJson = externalToolService.getToolAsJsonWithQueryParameters(externalToolHandler); - tools.add(toolToJson); + if (externalToolService.meetsRequirements(tool, dataFile)) { + tools.add(toolToJson); + } } return ok(tools); } catch (WrappedResponse wr) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Users.java b/src/main/java/edu/harvard/iq/dataverse/api/Users.java index d3b938af960..7568c7caff6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Users.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Users.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.logging.Level; import java.util.logging.Logger; import javax.ejb.Stateless; import javax.json.JsonArray; @@ -200,12 +201,17 @@ public Response getAuthenticatedUserByToken() { String tokenFromRequestAPI = getRequestApiKey(); AuthenticatedUser authenticatedUser = findUserByApiToken(tokenFromRequestAPI); + // This allows use of the :me API call from an active login session. Not sure + // this is a good idea if (authenticatedUser == null) { - return error(Response.Status.BAD_REQUEST, "User with token " + tokenFromRequestAPI + " not found."); - } else { - return ok(json(authenticatedUser)); + try { + authenticatedUser = findAuthenticatedUserOrDie(); + } catch (WrappedResponse ex) { + Logger.getLogger(Users.class.getName()).log(Level.SEVERE, null, ex); + return error(Response.Status.BAD_REQUEST, "User with token " + tokenFromRequestAPI + " not found."); + } } - + return ok(json(authenticatedUser)); } @POST diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java index b6d75276ae1..6543d771ebe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java @@ -110,7 +110,7 @@ public DepositReceipt createNew(String collectionUri, Deposit deposit, AuthCrede throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "user " + user.getDisplayInfo().getTitle() + " is not authorized to create a dataset in this dataverse."); } - DatasetVersion newDatasetVersion = dataset.getEditVersion(); + DatasetVersion newDatasetVersion = dataset.getOrCreateEditVersion(); String foreignFormat = SwordUtil.DCTERMS; try { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/ContainerManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/ContainerManagerImpl.java index dc178a9a740..8fb55a8eaf6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/ContainerManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/ContainerManagerImpl.java @@ -137,7 +137,7 @@ public DepositReceipt replaceMetadata(String uri, Deposit deposit, AuthCredentia if (!permissionService.isUserAllowedOn(user, updateDatasetCommand, dataset)) { throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "User " + user.getDisplayInfo().getTitle() + " is not authorized to modify dataverse " + dvThatOwnsDataset.getAlias()); } - DatasetVersion datasetVersion = dataset.getEditVersion(); + DatasetVersion datasetVersion = dataset.getOrCreateEditVersion(); // erase all metadata before creating populating dataset version List emptyDatasetFields = new ArrayList<>(); datasetVersion.setDatasetFields(emptyDatasetFields); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java index 928ffd4a129..5491024c73c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java @@ -250,7 +250,7 @@ DepositReceipt replaceOrAddFiles(String uri, Deposit deposit, AuthCredentials au // Make sure that the upload type is not rsync - handled above for dual mode // ------------------------------------- - if (dataset.getEditVersion().isHasPackageFile()) { + if (dataset.getOrCreateEditVersion().isHasPackageFile()) { throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile")); } @@ -276,7 +276,7 @@ DepositReceipt replaceOrAddFiles(String uri, Deposit deposit, AuthCredentials au } String uploadedZipFilename = deposit.getFilename(); - DatasetVersion editVersion = dataset.getEditVersion(); + DatasetVersion editVersion = dataset.getOrCreateEditVersion(); if (deposit.getInputStream() == null) { throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Deposit input stream was null."); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java index ce5f9415fcc..1e506c6a0b1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.api.datadeposit; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.File; import java.util.Arrays; @@ -86,37 +87,32 @@ public boolean storeAndCheckBinary() { @Override public String getTempDirectory() { - String tmpFileDir = System.getProperty(SystemConfig.FILES_DIRECTORY); - if (tmpFileDir != null) { - String swordDirString = tmpFileDir + File.separator + "sword"; - File swordDirFile = new File(swordDirString); - /** - * @todo Do we really need this check? It seems like we do because - * if you create a dataset via the native API and then later try to - * upload a file via SWORD, the directory defined by - * dataverse.files.directory may not exist and we get errors deep in - * the SWORD library code. Could maybe use a try catch in the doPost - * method of our SWORDv2MediaResourceServlet. - */ - if (swordDirFile.exists()) { + // will throw a runtime exception when not found + String tmpFileDir = JvmSettings.FILES_DIRECTORY.lookup(); + + String swordDirString = tmpFileDir + File.separator + "sword"; + File swordDirFile = new File(swordDirString); + /** + * @todo Do we really need this check? It seems like we do because + * if you create a dataset via the native API and then later try to + * upload a file via SWORD, the directory defined by + * dataverse.files.directory may not exist and we get errors deep in + * the SWORD library code. Could maybe use a try catch in the doPost + * method of our SWORDv2MediaResourceServlet. + */ + if (swordDirFile.exists()) { + return swordDirString; + } else { + boolean mkdirSuccess = swordDirFile.mkdirs(); + if (mkdirSuccess) { + logger.info("Created directory " + swordDirString); return swordDirString; } else { - boolean mkdirSuccess = swordDirFile.mkdirs(); - if (mkdirSuccess) { - logger.info("Created directory " + swordDirString); - return swordDirString; - } else { - String msgForSwordUsers = ("Could not determine or create SWORD temp directory. Check logs for details."); - logger.severe(msgForSwordUsers + " Failed to create " + swordDirString); - // sadly, must throw RunTimeException to communicate with SWORD user - throw new RuntimeException(msgForSwordUsers); - } + String msgForSwordUsers = ("Could not determine or create SWORD temp directory. Check logs for details."); + logger.severe(msgForSwordUsers + " Failed to create " + swordDirString); + // sadly, must throw RunTimeException to communicate with SWORD user + throw new RuntimeException(msgForSwordUsers); } - } else { - String msgForSwordUsers = ("JVM option \"" + SystemConfig.FILES_DIRECTORY + "\" not defined. Check logs for details."); - logger.severe(msgForSwordUsers); - // sadly, must throw RunTimeException to communicate with SWORD user - throw new RuntimeException(msgForSwordUsers); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java index a4e78b33a3c..d9433832309 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java @@ -1352,7 +1352,9 @@ private void processProdStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th } else if (xmlr.getLocalName().equals("prodDate")) { citation.getFields().add(FieldDTO.createPrimitiveFieldDTO("productionDate", parseDate(xmlr, "prodDate"))); } else if (xmlr.getLocalName().equals("prodPlac")) { - citation.getFields().add(FieldDTO.createPrimitiveFieldDTO("productionPlace", parseDate(xmlr, "prodPlac"))); + List prodPlac = new ArrayList<>(); + prodPlac.add(parseText(xmlr, "prodPlac")); + citation.getFields().add(FieldDTO.createMultiplePrimitiveFieldDTO(DatasetFieldConstant.productionPlace, prodPlac)); } else if (xmlr.getLocalName().equals("software")) { HashSet set = new HashSet<>(); addToSet(set,"softwareVersion", xmlr.getAttributeValue(null, "version")); diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java index b242cd2936f..9bf53116efa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java @@ -647,6 +647,8 @@ public AuthenticatedUser createAuthenticatedUser(UserRecordIdentifier userRecord actionLogSvc.log( new ActionLogRecord(ActionLogRecord.ActionType.Auth, "createUser") .setInfo(authenticatedUser.getIdentifier())); + + authenticatedUser.initialize(); return authenticatedUser; } @@ -938,4 +940,14 @@ public List getWorkflowCommentsByAuthenticatedUser(Authenticat return query.getResultList(); } + public ApiToken getValidApiTokenForUser(AuthenticatedUser user) { + ApiToken apiToken = null; + apiToken = findApiTokenByUser(user); + if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) { + logger.fine("Created apiToken for user: " + user.getIdentifier()); + apiToken = generateApiTokenForUser(user); + } + return apiToken; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java b/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java index 2cd28d9aac9..9fdfce2f1a7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java @@ -148,7 +148,7 @@ void prePersist() { } @PostLoad - void postLoad() { + public void initialize() { mutedNotificationsSet = Type.tokenizeToSet(mutedNotifications); mutedEmailsSet = Type.tokenizeToSet(mutedEmails); } diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java index 6b82a665c17..a5ba9a00bd2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java @@ -57,8 +57,10 @@ import javax.inject.Named; import javax.servlet.http.HttpServletRequest; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.IOUtils; +import java.io.File; import java.io.FileReader; import java.io.IOException; import java.sql.Timestamp; @@ -79,7 +81,7 @@ @Dependent public class FileRecordJobListener implements ItemReadListener, StepListener, JobListener { - public static final String SEP = System.getProperty("file.separator"); + public static final String SEP = File.separator; private static final UserNotification.Type notifyType = UserNotification.Type.FILESYSTEMIMPORT; @@ -190,7 +192,7 @@ public void beforeJob() throws Exception { // if mode = REPLACE, remove all filemetadata from the dataset version and start fresh if (mode.equalsIgnoreCase(ImportMode.REPLACE.name())) { try { - DatasetVersion workingVersion = dataset.getEditVersion(); + DatasetVersion workingVersion = dataset.getOrCreateEditVersion(); List fileMetadataList = workingVersion.getFileMetadatas(); jobLogger.log(Level.INFO, "Removing any existing file metadata since mode = REPLACE"); for (FileMetadata fmd : fileMetadataList) { @@ -433,8 +435,10 @@ private void loadChecksumManifest() { manifest = checksumManifest; getJobLogger().log(Level.INFO, "Checksum manifest = " + manifest + " (FileSystemImportJob.xml property)"); } - // construct full path - String manifestAbsolutePath = System.getProperty("dataverse.files.directory") + + // Construct full path - retrieve base dir via MPCONFIG. + // (Has sane default /tmp/dataverse from META-INF/microprofile-config.properties) + String manifestAbsolutePath = JvmSettings.FILES_DIRECTORY.lookup() + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java index b3d3a7107a6..a4f8ffd2378 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java @@ -24,6 +24,7 @@ import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.filefilter.NotFileFilter; import org.apache.commons.io.filefilter.WildcardFileFilter; @@ -54,7 +55,7 @@ @Dependent public class FileRecordReader extends AbstractItemReader { - public static final String SEP = System.getProperty("file.separator"); + public static final String SEP = File.separator; @Inject JobContext jobContext; @@ -96,9 +97,11 @@ public void init() { @Override public void open(Serializable checkpoint) throws Exception { - - directory = new File(System.getProperty("dataverse.files.directory") - + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder); + + // Retrieve via MPCONFIG. Has sane default /tmp/dataverse from META-INF/microprofile-config.properties + String baseDir = JvmSettings.FILES_DIRECTORY.lookup(); + + directory = new File(baseDir + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder); // TODO: // The above goes directly to the filesystem directory configured by the // old "dataverse.files.directory" JVM option (otherwise used for temp diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java b/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java index 4a778dc7abb..a2f76ca953d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java @@ -154,8 +154,8 @@ public static Logger getJobLogger(String jobId) { try { Logger jobLogger = Logger.getLogger("job-"+jobId); FileHandler fh; - String logDir = System.getProperty("com.sun.aas.instanceRoot") + System.getProperty("file.separator") - + "logs" + System.getProperty("file.separator") + "batch-jobs" + System.getProperty("file.separator"); + String logDir = System.getProperty("com.sun.aas.instanceRoot") + File.separator + + "logs" + File.separator + "batch-jobs" + File.separator; checkCreateLogDirectory( logDir ); fh = new FileHandler(logDir + "job-" + jobId + ".log"); logger.log(Level.INFO, "JOB LOG: " + logDir + "job-" + jobId + ".log"); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index d5f00b9868f..8ee3f0cf53c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -33,9 +33,11 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; +import java.util.function.Predicate; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; // Dataverse imports: import edu.harvard.iq.dataverse.DataFile; @@ -683,4 +685,56 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { } return true; } + + private List listAllFiles() throws IOException { + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This FileAccessIO object hasn't been properly initialized."); + } + + Path datasetDirectoryPath = Paths.get(dataset.getAuthorityForFileStorage(), dataset.getIdentifierForFileStorage()); + if (datasetDirectoryPath == null) { + throw new IOException("Could not determine the filesystem directory of the dataset."); + } + + DirectoryStream dirStream = Files.newDirectoryStream(Paths.get(this.getFilesRootDirectory(), datasetDirectoryPath.toString())); + + List res = new ArrayList<>(); + if (dirStream != null) { + for (Path filePath : dirStream) { + res.add(filePath.getFileName().toString()); + } + dirStream.close(); + } + + return res; + } + + private void deleteFile(String fileName) throws IOException { + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This FileAccessIO object hasn't been properly initialized."); + } + + Path datasetDirectoryPath = Paths.get(dataset.getAuthorityForFileStorage(), dataset.getIdentifierForFileStorage()); + if (datasetDirectoryPath == null) { + throw new IOException("Could not determine the filesystem directory of the dataset."); + } + + Path p = Paths.get(this.getFilesRootDirectory(), datasetDirectoryPath.toString(), fileName); + Files.delete(p); + } + + @Override + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { + List toDelete = this.listAllFiles().stream().filter(filter).collect(Collectors.toList()); + if (dryRun) { + return toDelete; + } + for (String f : toDelete) { + this.deleteFile(f); + } + return toDelete; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java index c9796d24b27..be6f9df0254 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java @@ -14,6 +14,7 @@ import java.nio.channels.WritableByteChannel; import java.nio.file.Path; import java.util.List; +import java.util.function.Predicate; import java.util.logging.Logger; /** @@ -159,5 +160,9 @@ public void revertBackupAsAux(String auxItemTag) throws IOException { throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); } - + @Override + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { + throw new UnsupportedDataAccessOperationException("InputStreamIO: tthis method is not supported in this DataAccess driver."); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index c8e42349318..66c6a4cc2ee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -24,6 +24,7 @@ import java.security.KeyStoreException; import java.security.NoSuchAlgorithmException; import java.util.List; +import java.util.function.Predicate; import java.util.logging.Logger; import org.apache.http.Header; @@ -630,5 +631,9 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { public static String getBaseStoreIdFor(String driverId) { return System.getProperty("dataverse.files." + driverId + ".base-store"); } - + + @Override + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { + return baseStore.cleanUp(filter, dryRun); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 3c9cef04980..f396b07d788 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -60,7 +60,10 @@ import java.util.HashMap; import java.util.List; import java.util.Random; +import java.util.function.Predicate; import java.util.logging.Logger; +import java.util.stream.Collectors; + import org.apache.commons.io.IOUtils; import org.eclipse.microprofile.config.Config; import org.eclipse.microprofile.config.ConfigProvider; @@ -1306,5 +1309,75 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { return true; } + private List listAllFiles() throws IOException { + if (!this.canWrite()) { + open(); + } + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This S3AccessIO object hasn't been properly initialized."); + } + String prefix = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/"; + + List ret = new ArrayList<>(); + ListObjectsRequest req = new ListObjectsRequest().withBucketName(bucketName).withPrefix(prefix); + ObjectListing storedFilesList = null; + try { + storedFilesList = s3.listObjects(req); + } catch (SdkClientException sce) { + throw new IOException ("S3 listObjects: failed to get a listing for " + prefix); + } + if (storedFilesList == null) { + return ret; + } + List storedFilesSummary = storedFilesList.getObjectSummaries(); + try { + while (storedFilesList.isTruncated()) { + logger.fine("S3 listObjects: going to next page of list"); + storedFilesList = s3.listNextBatchOfObjects(storedFilesList); + if (storedFilesList != null) { + storedFilesSummary.addAll(storedFilesList.getObjectSummaries()); + } + } + } catch (AmazonClientException ase) { + //logger.warning("Caught an AmazonServiceException in S3AccessIO.listObjects(): " + ase.getMessage()); + throw new IOException("S3AccessIO: Failed to get objects for listing."); + } -} + for (S3ObjectSummary item : storedFilesSummary) { + String fileName = item.getKey().substring(prefix.length()); + ret.add(fileName); + } + return ret; + } + + private void deleteFile(String fileName) throws IOException { + if (!this.canWrite()) { + open(); + } + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This S3AccessIO object hasn't been properly initialized."); + } + String prefix = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/"; + + try { + DeleteObjectRequest dor = new DeleteObjectRequest(bucketName, prefix + fileName); + s3.deleteObject(dor); + } catch (AmazonClientException ase) { + logger.warning("S3AccessIO: Unable to delete object " + ase.getMessage()); + } + } + + @Override + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { + List toDelete = this.listAllFiles().stream().filter(filter).collect(Collectors.toList()); + if (dryRun) { + return toDelete; + } + for (String f : toDelete) { + this.deleteFile(f); + } + return toDelete; + } +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 90e4a54dbe8..bfd5c5f0d8f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -39,6 +39,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -622,4 +623,6 @@ protected static boolean usesStandardNamePattern(String identifier) { return m.find(); } + public abstract List cleanUp(Predicate filter, boolean dryRun) throws IOException; + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index b1725b040a3..6c84009de3e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -22,7 +22,10 @@ import java.util.Formatter; import java.util.List; import java.util.Properties; +import java.util.function.Predicate; import java.util.logging.Logger; +import java.util.stream.Collectors; + import javax.crypto.Mac; import javax.crypto.spec.SecretKeySpec; import org.javaswift.joss.client.factory.AccountFactory; @@ -864,13 +867,16 @@ public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException } } + private String getSwiftContainerName(Dataset dataset) { + String authorityNoSlashes = dataset.getAuthorityForFileStorage().replace("/", swiftFolderPathSeparator); + return dataset.getProtocolForFileStorage() + swiftFolderPathSeparator + authorityNoSlashes.replace(".", swiftFolderPathSeparator) + + swiftFolderPathSeparator + dataset.getIdentifierForFileStorage(); + } + @Override public String getSwiftContainerName() { if (dvObject instanceof DataFile) { - String authorityNoSlashes = this.getDataFile().getOwner().getAuthorityForFileStorage().replace("/", swiftFolderPathSeparator); - return this.getDataFile().getOwner().getProtocolForFileStorage() + swiftFolderPathSeparator - + authorityNoSlashes.replace(".", swiftFolderPathSeparator) + - swiftFolderPathSeparator + this.getDataFile().getOwner().getIdentifierForFileStorage(); + return getSwiftContainerName(this.getDataFile().getOwner()); } return null; } @@ -893,5 +899,59 @@ public static String calculateRFC2104HMAC(String data, String key) mac.init(signingKey); return toHexString(mac.doFinal(data.getBytes())); } - + + private List listAllFiles() throws IOException { + if (!this.canWrite()) { + open(DataAccessOption.WRITE_ACCESS); + } + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This SwiftAccessIO object hasn't been properly initialized."); + } + String prefix = getSwiftContainerName(dataset) + swiftFolderPathSeparator; + + Collection items; + String lastItemName = null; + List ret = new ArrayList<>(); + + while ((items = this.swiftContainer.list(prefix, lastItemName, LIST_PAGE_LIMIT)) != null && items.size() > 0) { + for (StoredObject item : items) { + lastItemName = item.getName().substring(prefix.length()); + ret.add(lastItemName); + } + } + + return ret; + } + + private void deleteFile(String fileName) throws IOException { + if (!this.canWrite()) { + open(DataAccessOption.WRITE_ACCESS); + } + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This SwiftAccessIO object hasn't been properly initialized."); + } + String prefix = getSwiftContainerName(dataset) + swiftFolderPathSeparator; + + StoredObject fileObject = this.swiftContainer.getObject(prefix + fileName); + + if (!fileObject.exists()) { + throw new FileNotFoundException("SwiftAccessIO/Direct Access: " + fileName + " does not exist"); + } + + fileObject.delete(); + } + + @Override + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { + List toDelete = this.listAllFiles().stream().filter(filter).collect(Collectors.toList()); + if (dryRun) { + return toDelete; + } + for (String f : toDelete) { + this.deleteFile(f); + } + return toDelete; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java index 0b6b37af9f0..782f7f3a52d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java @@ -365,8 +365,8 @@ public void subsetFile(String infile, String outfile, List columns, Lon public void subsetFile(String infile, String outfile, List columns, Long numCases, String delimiter) { - try { - subsetFile(new FileInputStream(new File(infile)), outfile, columns, numCases, delimiter); + try (FileInputStream fis = new FileInputStream(new File(infile))){ + subsetFile(fis, outfile, columns, numCases, delimiter); } catch (IOException ex) { throw new RuntimeException("Could not open file "+infile); } @@ -375,33 +375,28 @@ public void subsetFile(String infile, String outfile, List columns, Lon public void subsetFile(InputStream in, String outfile, List columns, Long numCases, String delimiter) { - try { - Scanner scanner = new Scanner(in); - scanner.useDelimiter("\\n"); - - BufferedWriter out = new BufferedWriter(new FileWriter(outfile)); - for (long caseIndex = 0; caseIndex < numCases; caseIndex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split(delimiter,-1); - List ln = new ArrayList(); - for (Integer i : columns) { - ln.add(line[i]); + try (Scanner scanner = new Scanner(in); BufferedWriter out = new BufferedWriter(new FileWriter(outfile))) { + scanner.useDelimiter("\\n"); + + for (long caseIndex = 0; caseIndex < numCases; caseIndex++) { + if (scanner.hasNext()) { + String[] line = (scanner.next()).split(delimiter,-1); + List ln = new ArrayList(); + for (Integer i : columns) { + ln.add(line[i]); + } + out.write(StringUtils.join(ln,"\t")+"\n"); + } else { + throw new RuntimeException("Tab file has fewer rows than the determined number of cases."); } - out.write(StringUtils.join(ln,"\t")+"\n"); - } else { - throw new RuntimeException("Tab file has fewer rows than the determined number of cases."); } - } - while (scanner.hasNext()) { - if (!"".equals(scanner.next()) ) { - throw new RuntimeException("Tab file has extra nonempty rows than the determined number of cases."); + while (scanner.hasNext()) { + if (!"".equals(scanner.next()) ) { + throw new RuntimeException("Tab file has extra nonempty rows than the determined number of cases."); + } } - } - - scanner.close(); - out.close(); } catch (FileNotFoundException e) { e.printStackTrace(); @@ -418,50 +413,48 @@ public void subsetFile(InputStream in, String outfile, List columns, Lo public static Double[] subsetDoubleVector(InputStream in, int column, int numCases) { Double[] retVector = new Double[numCases]; - Scanner scanner = new Scanner(in); - scanner.useDelimiter("\\n"); - - for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split("\t", -1); - - // Verified: new Double("nan") works correctly, - // resulting in Double.NaN; - // Double("[+-]Inf") doesn't work however; - // (the constructor appears to be expecting it - // to be spelled as "Infinity", "-Infinity", etc. - if ("inf".equalsIgnoreCase(line[column]) || "+inf".equalsIgnoreCase(line[column])) { - retVector[caseIndex] = java.lang.Double.POSITIVE_INFINITY; - } else if ("-inf".equalsIgnoreCase(line[column])) { - retVector[caseIndex] = java.lang.Double.NEGATIVE_INFINITY; - } else if (line[column] == null || line[column].equals("")) { - // missing value: - retVector[caseIndex] = null; - } else { - try { - retVector[caseIndex] = new Double(line[column]); - } catch (NumberFormatException ex) { - retVector[caseIndex] = null; // missing value + try (Scanner scanner = new Scanner(in)) { + scanner.useDelimiter("\\n"); + + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { + if (scanner.hasNext()) { + String[] line = (scanner.next()).split("\t", -1); + + // Verified: new Double("nan") works correctly, + // resulting in Double.NaN; + // Double("[+-]Inf") doesn't work however; + // (the constructor appears to be expecting it + // to be spelled as "Infinity", "-Infinity", etc. + if ("inf".equalsIgnoreCase(line[column]) || "+inf".equalsIgnoreCase(line[column])) { + retVector[caseIndex] = java.lang.Double.POSITIVE_INFINITY; + } else if ("-inf".equalsIgnoreCase(line[column])) { + retVector[caseIndex] = java.lang.Double.NEGATIVE_INFINITY; + } else if (line[column] == null || line[column].equals("")) { + // missing value: + retVector[caseIndex] = null; + } else { + try { + retVector[caseIndex] = new Double(line[column]); + } catch (NumberFormatException ex) { + retVector[caseIndex] = null; // missing value + } } - } - } else { - scanner.close(); - throw new RuntimeException("Tab file has fewer rows than the stored number of cases!"); + } else { + throw new RuntimeException("Tab file has fewer rows than the stored number of cases!"); + } } - } - int tailIndex = numCases; - while (scanner.hasNext()) { - String nextLine = scanner.next(); - if (!"".equals(nextLine)) { - scanner.close(); - throw new RuntimeException("Column " + column + ": tab file has more nonempty rows than the stored number of cases (" + numCases + ")! current index: " + tailIndex + ", line: " + nextLine); + int tailIndex = numCases; + while (scanner.hasNext()) { + String nextLine = scanner.next(); + if (!"".equals(nextLine)) { + throw new RuntimeException("Column " + column + ": tab file has more nonempty rows than the stored number of cases (" + numCases + ")! current index: " + tailIndex + ", line: " + nextLine); + } + tailIndex++; } - tailIndex++; - } - scanner.close(); + } return retVector; } @@ -472,48 +465,46 @@ public static Double[] subsetDoubleVector(InputStream in, int column, int numCas */ public static Float[] subsetFloatVector(InputStream in, int column, int numCases) { Float[] retVector = new Float[numCases]; - Scanner scanner = new Scanner(in); - scanner.useDelimiter("\\n"); - - for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split("\t", -1); - // Verified: new Float("nan") works correctly, - // resulting in Float.NaN; - // Float("[+-]Inf") doesn't work however; - // (the constructor appears to be expecting it - // to be spelled as "Infinity", "-Infinity", etc. - if ("inf".equalsIgnoreCase(line[column]) || "+inf".equalsIgnoreCase(line[column])) { - retVector[caseIndex] = java.lang.Float.POSITIVE_INFINITY; - } else if ("-inf".equalsIgnoreCase(line[column])) { - retVector[caseIndex] = java.lang.Float.NEGATIVE_INFINITY; - } else if (line[column] == null || line[column].equals("")) { - // missing value: - retVector[caseIndex] = null; - } else { - try { - retVector[caseIndex] = new Float(line[column]); - } catch (NumberFormatException ex) { - retVector[caseIndex] = null; // missing value + try (Scanner scanner = new Scanner(in)) { + scanner.useDelimiter("\\n"); + + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { + if (scanner.hasNext()) { + String[] line = (scanner.next()).split("\t", -1); + // Verified: new Float("nan") works correctly, + // resulting in Float.NaN; + // Float("[+-]Inf") doesn't work however; + // (the constructor appears to be expecting it + // to be spelled as "Infinity", "-Infinity", etc. + if ("inf".equalsIgnoreCase(line[column]) || "+inf".equalsIgnoreCase(line[column])) { + retVector[caseIndex] = java.lang.Float.POSITIVE_INFINITY; + } else if ("-inf".equalsIgnoreCase(line[column])) { + retVector[caseIndex] = java.lang.Float.NEGATIVE_INFINITY; + } else if (line[column] == null || line[column].equals("")) { + // missing value: + retVector[caseIndex] = null; + } else { + try { + retVector[caseIndex] = new Float(line[column]); + } catch (NumberFormatException ex) { + retVector[caseIndex] = null; // missing value + } } + } else { + throw new RuntimeException("Tab file has fewer rows than the stored number of cases!"); } - } else { - scanner.close(); - throw new RuntimeException("Tab file has fewer rows than the stored number of cases!"); } - } - int tailIndex = numCases; - while (scanner.hasNext()) { - String nextLine = scanner.next(); - if (!"".equals(nextLine)) { - scanner.close(); - throw new RuntimeException("Column "+column+": tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); + int tailIndex = numCases; + while (scanner.hasNext()) { + String nextLine = scanner.next(); + if (!"".equals(nextLine)) { + throw new RuntimeException("Column "+column+": tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); + } + tailIndex++; } - tailIndex++; - } - scanner.close(); + } return retVector; } @@ -524,34 +515,32 @@ public static Float[] subsetFloatVector(InputStream in, int column, int numCases */ public static Long[] subsetLongVector(InputStream in, int column, int numCases) { Long[] retVector = new Long[numCases]; - Scanner scanner = new Scanner(in); - scanner.useDelimiter("\\n"); + try (Scanner scanner = new Scanner(in)) { + scanner.useDelimiter("\\n"); - for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split("\t", -1); - try { - retVector[caseIndex] = new Long(line[column]); - } catch (NumberFormatException ex) { - retVector[caseIndex] = null; // assume missing value + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { + if (scanner.hasNext()) { + String[] line = (scanner.next()).split("\t", -1); + try { + retVector[caseIndex] = new Long(line[column]); + } catch (NumberFormatException ex) { + retVector[caseIndex] = null; // assume missing value + } + } else { + throw new RuntimeException("Tab file has fewer rows than the stored number of cases!"); } - } else { - scanner.close(); - throw new RuntimeException("Tab file has fewer rows than the stored number of cases!"); } - } - int tailIndex = numCases; - while (scanner.hasNext()) { - String nextLine = scanner.next(); - if (!"".equals(nextLine)) { - scanner.close(); - throw new RuntimeException("Column "+column+": tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); + int tailIndex = numCases; + while (scanner.hasNext()) { + String nextLine = scanner.next(); + if (!"".equals(nextLine)) { + throw new RuntimeException("Column "+column+": tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); + } + tailIndex++; } - tailIndex++; - } - scanner.close(); + } return retVector; } @@ -562,75 +551,72 @@ public static Long[] subsetLongVector(InputStream in, int column, int numCases) */ public static String[] subsetStringVector(InputStream in, int column, int numCases) { String[] retVector = new String[numCases]; - Scanner scanner = new Scanner(in); - scanner.useDelimiter("\\n"); + try (Scanner scanner = new Scanner(in)) { + scanner.useDelimiter("\\n"); - for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split("\t", -1); - retVector[caseIndex] = line[column]; - - - if ("".equals(line[column])) { - // An empty string is a string missing value! - // An empty string in quotes is an empty string! - retVector[caseIndex] = null; - } else { - // Strip the outer quotes: - line[column] = line[column].replaceFirst("^\\\"", ""); - line[column] = line[column].replaceFirst("\\\"$", ""); - - // We need to restore the special characters that - // are stored in tab files escaped - quotes, new lines - // and tabs. Before we do that however, we need to - // take care of any escaped backslashes stored in - // the tab file. I.e., "foo\t" should be transformed - // to "foo"; but "foo\\t" should be transformed - // to "foo\t". This way new lines and tabs that were - // already escaped in the original data are not - // going to be transformed to unescaped tab and - // new line characters! - String[] splitTokens = line[column].split(Matcher.quoteReplacement("\\\\"), -2); - - // (note that it's important to use the 2-argument version - // of String.split(), and set the limit argument to a - // negative value; otherwise any trailing backslashes - // are lost.) - for (int i = 0; i < splitTokens.length; i++) { - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\\""), "\""); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\t"), "\t"); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\n"), "\n"); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\r"), "\r"); - } - // TODO: - // Make (some of?) the above optional; for ex., we - // do need to restore the newlines when calculating UNFs; - // But if we are subsetting these vectors in order to - // create a new tab-delimited file, they will - // actually break things! -- L.A. Jul. 28 2014 + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { + if (scanner.hasNext()) { + String[] line = (scanner.next()).split("\t", -1); + retVector[caseIndex] = line[column]; - line[column] = StringUtils.join(splitTokens, '\\'); + if ("".equals(line[column])) { + // An empty string is a string missing value! + // An empty string in quotes is an empty string! + retVector[caseIndex] = null; + } else { + // Strip the outer quotes: + line[column] = line[column].replaceFirst("^\\\"", ""); + line[column] = line[column].replaceFirst("\\\"$", ""); + + // We need to restore the special characters that + // are stored in tab files escaped - quotes, new lines + // and tabs. Before we do that however, we need to + // take care of any escaped backslashes stored in + // the tab file. I.e., "foo\t" should be transformed + // to "foo"; but "foo\\t" should be transformed + // to "foo\t". This way new lines and tabs that were + // already escaped in the original data are not + // going to be transformed to unescaped tab and + // new line characters! + String[] splitTokens = line[column].split(Matcher.quoteReplacement("\\\\"), -2); + + // (note that it's important to use the 2-argument version + // of String.split(), and set the limit argument to a + // negative value; otherwise any trailing backslashes + // are lost.) + for (int i = 0; i < splitTokens.length; i++) { + splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\\""), "\""); + splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\t"), "\t"); + splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\n"), "\n"); + splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\r"), "\r"); + } + // TODO: + // Make (some of?) the above optional; for ex., we + // do need to restore the newlines when calculating UNFs; + // But if we are subsetting these vectors in order to + // create a new tab-delimited file, they will + // actually break things! -- L.A. Jul. 28 2014 - retVector[caseIndex] = line[column]; - } + line[column] = StringUtils.join(splitTokens, '\\'); - } else { - scanner.close(); - throw new RuntimeException("Tab file has fewer rows than the stored number of cases!"); + retVector[caseIndex] = line[column]; + } + + } else { + throw new RuntimeException("Tab file has fewer rows than the stored number of cases!"); + } } - } - int tailIndex = numCases; - while (scanner.hasNext()) { - String nextLine = scanner.next(); - if (!"".equals(nextLine)) { - scanner.close(); - throw new RuntimeException("Column "+column+": tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); + int tailIndex = numCases; + while (scanner.hasNext()) { + String nextLine = scanner.next(); + if (!"".equals(nextLine)) { + throw new RuntimeException("Column "+column+": tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); + } + tailIndex++; } - tailIndex++; - } - scanner.close(); + } return retVector; } @@ -643,42 +629,40 @@ public static String[] subsetStringVector(InputStream in, int column, int numCas */ public static Double[][] subsetDoubleVectors(InputStream in, Set columns, int numCases) throws IOException { Double[][] retVector = new Double[columns.size()][numCases]; - Scanner scanner = new Scanner(in); - scanner.useDelimiter("\\n"); - - for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split("\t", -1); - int j = 0; - for (Integer i : columns) { - try { - // TODO: verify that NaN and +-Inf are going to be - // handled correctly here! -- L.A. - // NO, "+-Inf" is not handled correctly; see the - // comment further down below. - retVector[j][caseIndex] = new Double(line[i]); - } catch (NumberFormatException ex) { - retVector[j][caseIndex] = null; // missing value + try (Scanner scanner = new Scanner(in)) { + scanner.useDelimiter("\\n"); + + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { + if (scanner.hasNext()) { + String[] line = (scanner.next()).split("\t", -1); + int j = 0; + for (Integer i : columns) { + try { + // TODO: verify that NaN and +-Inf are going to be + // handled correctly here! -- L.A. + // NO, "+-Inf" is not handled correctly; see the + // comment further down below. + retVector[j][caseIndex] = new Double(line[i]); + } catch (NumberFormatException ex) { + retVector[j][caseIndex] = null; // missing value + } + j++; } - j++; + } else { + throw new IOException("Tab file has fewer rows than the stored number of cases!"); } - } else { - scanner.close(); - throw new IOException("Tab file has fewer rows than the stored number of cases!"); } - } - int tailIndex = numCases; - while (scanner.hasNext()) { - String nextLine = scanner.next(); - if (!"".equals(nextLine)) { - scanner.close(); - throw new IOException("Tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); + int tailIndex = numCases; + while (scanner.hasNext()) { + String nextLine = scanner.next(); + if (!"".equals(nextLine)) { + throw new IOException("Tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); + } + tailIndex++; } - tailIndex++; - } - scanner.close(); + } return retVector; } @@ -839,237 +823,238 @@ public Object[] subsetObjectVector(File tabfile, int column, int varcount, int c columnOffset = varcount * 8; columnLength = columnEndOffsets[0] - varcount * 8; } + int caseindex = 0; - FileChannel fc = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()), StandardOpenOption.READ)); - fc.position(columnOffset); - int MAX_COLUMN_BUFFER = 8192; - - ByteBuffer in = ByteBuffer.allocate(MAX_COLUMN_BUFFER); - - if (columnLength < MAX_COLUMN_BUFFER) { - in.limit((int)(columnLength)); - } - - long bytesRead = 0; - long bytesReadTotal = 0; - int caseindex = 0; - int byteoffset = 0; - byte[] leftover = null; - - while (bytesReadTotal < columnLength) { - bytesRead = fc.read(in); - byte[] columnBytes = in.array(); - int bytecount = 0; + try (FileChannel fc = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()), + StandardOpenOption.READ))) { + fc.position(columnOffset); + int MAX_COLUMN_BUFFER = 8192; - - while (bytecount < bytesRead) { - if (columnBytes[bytecount] == '\n') { - /* - String token = new String(columnBytes, byteoffset, bytecount-byteoffset, "UTF8"); - - if (leftover != null) { - String leftoverString = new String (leftover, "UTF8"); - token = leftoverString + token; - leftover = null; - } - */ - /* - * Note that the way I was doing it at first - above - - * was not quite the correct way - because I was creating UTF8 - * strings from the leftover bytes, and the bytes in the - * current buffer *separately*; which means, if a multi-byte - * UTF8 character got split in the middle between one buffer - * and the next, both chunks of it would become junk - * characters, on each side! - * The correct way of doing it, of course, is to create a - * merged byte buffer, and then turn it into a UTF8 string. - * -- L.A. 4.0 - */ - String token = null; - - if (leftover == null) { - token = new String(columnBytes, byteoffset, bytecount-byteoffset, "UTF8"); - } else { - byte[] merged = new byte[leftover.length + bytecount-byteoffset]; - - System.arraycopy(leftover, 0, merged, 0, leftover.length); - System.arraycopy(columnBytes, byteoffset, merged, leftover.length, bytecount-byteoffset); - token = new String (merged, "UTF8"); - leftover = null; - merged = null; - } - - if (isString) { - if ("".equals(token)) { - // An empty string is a string missing value! - // An empty string in quotes is an empty string! - retVector[caseindex] = null; + ByteBuffer in = ByteBuffer.allocate(MAX_COLUMN_BUFFER); + + if (columnLength < MAX_COLUMN_BUFFER) { + in.limit((int) (columnLength)); + } + + long bytesRead = 0; + long bytesReadTotal = 0; + + int byteoffset = 0; + byte[] leftover = null; + + while (bytesReadTotal < columnLength) { + bytesRead = fc.read(in); + byte[] columnBytes = in.array(); + int bytecount = 0; + + while (bytecount < bytesRead) { + if (columnBytes[bytecount] == '\n') { + /* + String token = new String(columnBytes, byteoffset, bytecount-byteoffset, "UTF8"); + + if (leftover != null) { + String leftoverString = new String (leftover, "UTF8"); + token = leftoverString + token; + leftover = null; + } + */ + /* + * Note that the way I was doing it at first - above - + * was not quite the correct way - because I was creating UTF8 + * strings from the leftover bytes, and the bytes in the + * current buffer *separately*; which means, if a multi-byte + * UTF8 character got split in the middle between one buffer + * and the next, both chunks of it would become junk + * characters, on each side! + * The correct way of doing it, of course, is to create a + * merged byte buffer, and then turn it into a UTF8 string. + * -- L.A. 4.0 + */ + String token = null; + + if (leftover == null) { + token = new String(columnBytes, byteoffset, bytecount - byteoffset, "UTF8"); } else { - // Strip the outer quotes: - token = token.replaceFirst("^\\\"", ""); - token = token.replaceFirst("\\\"$", ""); - - // We need to restore the special characters that - // are stored in tab files escaped - quotes, new lines - // and tabs. Before we do that however, we need to - // take care of any escaped backslashes stored in - // the tab file. I.e., "foo\t" should be transformed - // to "foo"; but "foo\\t" should be transformed - // to "foo\t". This way new lines and tabs that were - // already escaped in the original data are not - // going to be transformed to unescaped tab and - // new line characters! - - String[] splitTokens = token.split(Matcher.quoteReplacement("\\\\"), -2); - - // (note that it's important to use the 2-argument version - // of String.split(), and set the limit argument to a - // negative value; otherwise any trailing backslashes - // are lost.) - - for (int i = 0; i < splitTokens.length; i++) { - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\\""), "\""); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\t"), "\t"); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\n"), "\n"); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\r"), "\r"); - } - // TODO: - // Make (some of?) the above optional; for ex., we - // do need to restore the newlines when calculating UNFs; - // But if we are subsetting these vectors in order to - // create a new tab-delimited file, they will - // actually break things! -- L.A. Jul. 28 2014 - - token = StringUtils.join(splitTokens, '\\'); - - // "compatibility mode" - a hack, to be able to produce - // unfs identical to those produced by the "early" - // unf5 jar; will be removed in production 4.0. - // -- L.A. (TODO: ...) - if (compatmode && !"".equals(token)) { - if (token.length() > 128) { - if ("".equals(token.trim())) { - // don't ask... - token = token.substring(0, 129); + byte[] merged = new byte[leftover.length + bytecount - byteoffset]; + + System.arraycopy(leftover, 0, merged, 0, leftover.length); + System.arraycopy(columnBytes, byteoffset, merged, leftover.length, bytecount - byteoffset); + token = new String(merged, "UTF8"); + leftover = null; + merged = null; + } + + if (isString) { + if ("".equals(token)) { + // An empty string is a string missing value! + // An empty string in quotes is an empty string! + retVector[caseindex] = null; + } else { + // Strip the outer quotes: + token = token.replaceFirst("^\\\"", ""); + token = token.replaceFirst("\\\"$", ""); + + // We need to restore the special characters that + // are stored in tab files escaped - quotes, new lines + // and tabs. Before we do that however, we need to + // take care of any escaped backslashes stored in + // the tab file. I.e., "foo\t" should be transformed + // to "foo"; but "foo\\t" should be transformed + // to "foo\t". This way new lines and tabs that were + // already escaped in the original data are not + // going to be transformed to unescaped tab and + // new line characters! + + String[] splitTokens = token.split(Matcher.quoteReplacement("\\\\"), -2); + + // (note that it's important to use the 2-argument version + // of String.split(), and set the limit argument to a + // negative value; otherwise any trailing backslashes + // are lost.) + + for (int i = 0; i < splitTokens.length; i++) { + splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\\""), "\""); + splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\t"), "\t"); + splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\n"), "\n"); + splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\r"), "\r"); + } + // TODO: + // Make (some of?) the above optional; for ex., we + // do need to restore the newlines when calculating UNFs; + // But if we are subsetting these vectors in order to + // create a new tab-delimited file, they will + // actually break things! -- L.A. Jul. 28 2014 + + token = StringUtils.join(splitTokens, '\\'); + + // "compatibility mode" - a hack, to be able to produce + // unfs identical to those produced by the "early" + // unf5 jar; will be removed in production 4.0. + // -- L.A. (TODO: ...) + if (compatmode && !"".equals(token)) { + if (token.length() > 128) { + if ("".equals(token.trim())) { + // don't ask... + token = token.substring(0, 129); + } else { + token = token.substring(0, 128); + // token = String.format(loc, "%.128s", token); + token = token.trim(); + // dbgLog.info("formatted and trimmed: "+token); + } } else { - token = token.substring(0, 128); - //token = String.format(loc, "%.128s", token); - token = token.trim(); - //dbgLog.info("formatted and trimmed: "+token); + if ("".equals(token.trim())) { + // again, don't ask; + // - this replicates some bugginness + // that happens inside unf5; + token = "null"; + } else { + token = token.trim(); + } } + } + + retVector[caseindex] = token; + } + } else if (isDouble) { + try { + // TODO: verify that NaN and +-Inf are + // handled correctly here! -- L.A. + // Verified: new Double("nan") works correctly, + // resulting in Double.NaN; + // Double("[+-]Inf") doesn't work however; + // (the constructor appears to be expecting it + // to be spelled as "Infinity", "-Infinity", etc. + if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) { + retVector[caseindex] = java.lang.Double.POSITIVE_INFINITY; + } else if ("-inf".equalsIgnoreCase(token)) { + retVector[caseindex] = java.lang.Double.NEGATIVE_INFINITY; + } else if (token == null || token.equals("")) { + // missing value: + retVector[caseindex] = null; } else { - if ("".equals(token.trim())) { - // again, don't ask; - // - this replicates some bugginness - // that happens inside unf5; - token = "null"; - } else { - token = token.trim(); - } + retVector[caseindex] = new Double(token); } + } catch (NumberFormatException ex) { + dbgLog.warning("NumberFormatException thrown for " + token + " as Double"); + + retVector[caseindex] = null; // missing value + // TODO: ? } - - retVector[caseindex] = token; - } - } else if (isDouble) { - try { - // TODO: verify that NaN and +-Inf are - // handled correctly here! -- L.A. - // Verified: new Double("nan") works correctly, - // resulting in Double.NaN; - // Double("[+-]Inf") doesn't work however; - // (the constructor appears to be expecting it - // to be spelled as "Infinity", "-Infinity", etc. - if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Double.POSITIVE_INFINITY; - } else if ("-inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Double.NEGATIVE_INFINITY; - } else if (token == null || token.equals("")) { - // missing value: - retVector[caseindex] = null; - } else { - retVector[caseindex] = new Double(token); + } else if (isLong) { + try { + retVector[caseindex] = new Long(token); + } catch (NumberFormatException ex) { + retVector[caseindex] = null; // assume missing value } - } catch (NumberFormatException ex) { - dbgLog.warning("NumberFormatException thrown for "+token+" as Double"); - - retVector[caseindex] = null; // missing value - // TODO: ? - } - } else if (isLong) { - try { - retVector[caseindex] = new Long(token); - } catch (NumberFormatException ex) { - retVector[caseindex] = null; // assume missing value - } - } else if (isFloat) { - try { - if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Float.POSITIVE_INFINITY; - } else if ("-inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Float.NEGATIVE_INFINITY; - } else if (token == null || token.equals("")) { - // missing value: - retVector[caseindex] = null; - } else { - retVector[caseindex] = new Float(token); + } else if (isFloat) { + try { + if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) { + retVector[caseindex] = java.lang.Float.POSITIVE_INFINITY; + } else if ("-inf".equalsIgnoreCase(token)) { + retVector[caseindex] = java.lang.Float.NEGATIVE_INFINITY; + } else if (token == null || token.equals("")) { + // missing value: + retVector[caseindex] = null; + } else { + retVector[caseindex] = new Float(token); + } + } catch (NumberFormatException ex) { + dbgLog.warning("NumberFormatException thrown for " + token + " as Float"); + retVector[caseindex] = null; // assume missing value (TODO: ?) } - } catch (NumberFormatException ex) { - dbgLog.warning("NumberFormatException thrown for "+token+" as Float"); - retVector[caseindex] = null; // assume missing value (TODO: ?) } - } - caseindex++; - - if (bytecount == bytesRead - 1) { - byteoffset = 0; - } else { - byteoffset = bytecount + 1; - } - } else { - if (bytecount == bytesRead - 1) { - // We've reached the end of the buffer; - // This means we'll save whatever unused bytes left in - // it - i.e., the bytes between the last new line - // encountered and the end - in the leftover buffer. - - // *EXCEPT*, there may be a case of a very long String - // that is actually longer than MAX_COLUMN_BUFFER, in - // which case it is possible that we've read through - // an entire buffer of bytes without finding any - // new lines... in this case we may need to add this - // entire byte buffer to an already existing leftover - // buffer! - if (leftover == null) { - leftover = new byte[(int)bytesRead - byteoffset]; - System.arraycopy(columnBytes, byteoffset, leftover, 0, (int)bytesRead - byteoffset); + caseindex++; + + if (bytecount == bytesRead - 1) { + byteoffset = 0; } else { - if (byteoffset != 0) { + byteoffset = bytecount + 1; + } + } else { + if (bytecount == bytesRead - 1) { + // We've reached the end of the buffer; + // This means we'll save whatever unused bytes left in + // it - i.e., the bytes between the last new line + // encountered and the end - in the leftover buffer. + + // *EXCEPT*, there may be a case of a very long String + // that is actually longer than MAX_COLUMN_BUFFER, in + // which case it is possible that we've read through + // an entire buffer of bytes without finding any + // new lines... in this case we may need to add this + // entire byte buffer to an already existing leftover + // buffer! + if (leftover == null) { + leftover = new byte[(int) bytesRead - byteoffset]; + System.arraycopy(columnBytes, byteoffset, leftover, 0, (int) bytesRead - byteoffset); + } else { + if (byteoffset != 0) { throw new IOException("Reached the end of the byte buffer, with some leftover left from the last read; yet the offset is not zero!"); + } + byte[] merged = new byte[leftover.length + (int) bytesRead]; + + System.arraycopy(leftover, 0, merged, 0, leftover.length); + System.arraycopy(columnBytes, byteoffset, merged, leftover.length, (int) bytesRead); + // leftover = null; + leftover = merged; + merged = null; } - byte[] merged = new byte[leftover.length + (int)bytesRead]; + byteoffset = 0; - System.arraycopy(leftover, 0, merged, 0, leftover.length); - System.arraycopy(columnBytes, byteoffset, merged, leftover.length, (int)bytesRead); - //leftover = null; - leftover = merged; - merged = null; } - byteoffset = 0; - } + bytecount++; + } + + bytesReadTotal += bytesRead; + in.clear(); + if (columnLength - bytesReadTotal < MAX_COLUMN_BUFFER) { + in.limit((int) (columnLength - bytesReadTotal)); } - bytecount++; - } - - bytesReadTotal += bytesRead; - in.clear(); - if (columnLength - bytesReadTotal < MAX_COLUMN_BUFFER) { - in.limit((int)(columnLength - bytesReadTotal)); } + } - - fc.close(); if (caseindex != casecount) { throw new IOException("Faile to read "+casecount+" tokens for column "+column); @@ -1080,31 +1065,31 @@ public Object[] subsetObjectVector(File tabfile, int column, int varcount, int c } private long[] extractColumnOffsets (File rotatedImageFile, int varcount, int casecount) throws IOException { - BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotatedImageFile)); - - byte[] offsetHeader = new byte[varcount * 8]; long[] byteOffsets = new long[varcount]; - - int readlen = rotfileStream.read(offsetHeader); - - if (readlen != varcount * 8) { - throw new IOException ("Could not read "+varcount*8+" header bytes from the rotated file."); - } - - for (int varindex = 0; varindex < varcount; varindex++) { - byte[] offsetBytes = new byte[8]; - System.arraycopy(offsetHeader, varindex*8, offsetBytes, 0, 8); - - ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes); - byteOffsets[varindex] = offsetByteBuffer.getLong(); - - //System.out.println(byteOffsets[varindex]); + try (BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotatedImageFile))) { + + byte[] offsetHeader = new byte[varcount * 8]; + + int readlen = rotfileStream.read(offsetHeader); + + if (readlen != varcount * 8) { + throw new IOException("Could not read " + varcount * 8 + " header bytes from the rotated file."); + } + + for (int varindex = 0; varindex < varcount; varindex++) { + byte[] offsetBytes = new byte[8]; + System.arraycopy(offsetHeader, varindex * 8, offsetBytes, 0, 8); + + ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes); + byteOffsets[varindex] = offsetByteBuffer.getLong(); + + // System.out.println(byteOffsets[varindex]); + } + } - - rotfileStream.close(); - - return byteOffsets; + + return byteOffsets; } private File getRotatedImage(File tabfile, int varcount, int casecount) throws IOException { @@ -1149,85 +1134,84 @@ private File generateRotatedImage (File tabfile, int varcount, int casecount) th // read the tab-delimited file: - FileInputStream tabfileStream = new FileInputStream(tabfile); - - Scanner scanner = new Scanner(tabfileStream); - scanner.useDelimiter("\\n"); - - for (int caseindex = 0; caseindex < casecount; caseindex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split("\t", -1); - // TODO: throw an exception if there are fewer tab-delimited - // tokens than the number of variables specified. - String token = ""; - int tokensize = 0; - for (int varindex = 0; varindex < varcount; varindex++) { - // TODO: figure out the safest way to convert strings to - // bytes here. Is it going to be safer to use getBytes("UTF8")? - // we are already making the assumption that the values - // in the tab file are in UTF8. -- L.A. - token = line[varindex] + "\n"; - tokensize = token.getBytes().length; - if (bufferedSizes[varindex]+tokensize > MAX_COLUMN_BUFFER) { - // fill the buffer and dump its contents into the temp file: - // (do note that there may be *several* MAX_COLUMN_BUFFERs - // worth of bytes in the token!) - - int tokenoffset = 0; - - if (bufferedSizes[varindex] != MAX_COLUMN_BUFFER) { - tokenoffset = MAX_COLUMN_BUFFER-bufferedSizes[varindex]; - System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokenoffset); - } // (otherwise the buffer is already full, and we should - // simply dump it into the temp file, without adding any - // extra bytes to it) - - File bufferTempFile = columnTempFiles[varindex]; - if (bufferTempFile == null) { - bufferTempFile = File.createTempFile("columnBufferFile", "bytes"); - columnTempFiles[varindex] = bufferTempFile; - } - - // *append* the contents of the buffer to the end of the - // temp file, if already exists: - BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream (bufferTempFile, true)); - outputStream.write(bufferedColumns[varindex], 0, MAX_COLUMN_BUFFER); - cachedfileSizes[varindex] += MAX_COLUMN_BUFFER; - - // keep writing MAX_COLUMN_BUFFER-size chunks of bytes into - // the temp file, for as long as there's more than MAX_COLUMN_BUFFER - // bytes left in the token: - - while (tokensize - tokenoffset > MAX_COLUMN_BUFFER) { - outputStream.write(token.getBytes(), tokenoffset, MAX_COLUMN_BUFFER); - cachedfileSizes[varindex] += MAX_COLUMN_BUFFER; - tokenoffset += MAX_COLUMN_BUFFER; + try (FileInputStream tabfileStream = new FileInputStream(tabfile); + Scanner scanner = new Scanner(tabfileStream)) { + scanner.useDelimiter("\\n"); + + for (int caseindex = 0; caseindex < casecount; caseindex++) { + if (scanner.hasNext()) { + String[] line = (scanner.next()).split("\t", -1); + // TODO: throw an exception if there are fewer tab-delimited + // tokens than the number of variables specified. + String token = ""; + int tokensize = 0; + for (int varindex = 0; varindex < varcount; varindex++) { + // TODO: figure out the safest way to convert strings to + // bytes here. Is it going to be safer to use getBytes("UTF8")? + // we are already making the assumption that the values + // in the tab file are in UTF8. -- L.A. + token = line[varindex] + "\n"; + tokensize = token.getBytes().length; + if (bufferedSizes[varindex] + tokensize > MAX_COLUMN_BUFFER) { + // fill the buffer and dump its contents into the temp file: + // (do note that there may be *several* MAX_COLUMN_BUFFERs + // worth of bytes in the token!) + + int tokenoffset = 0; + + if (bufferedSizes[varindex] != MAX_COLUMN_BUFFER) { + tokenoffset = MAX_COLUMN_BUFFER - bufferedSizes[varindex]; + System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokenoffset); + } // (otherwise the buffer is already full, and we should + // simply dump it into the temp file, without adding any + // extra bytes to it) + + File bufferTempFile = columnTempFiles[varindex]; + if (bufferTempFile == null) { + bufferTempFile = File.createTempFile("columnBufferFile", "bytes"); + columnTempFiles[varindex] = bufferTempFile; + } + + // *append* the contents of the buffer to the end of the + // temp file, if already exists: + try (BufferedOutputStream outputStream = new BufferedOutputStream( + new FileOutputStream(bufferTempFile, true))) { + outputStream.write(bufferedColumns[varindex], 0, MAX_COLUMN_BUFFER); + cachedfileSizes[varindex] += MAX_COLUMN_BUFFER; + + // keep writing MAX_COLUMN_BUFFER-size chunks of bytes into + // the temp file, for as long as there's more than MAX_COLUMN_BUFFER + // bytes left in the token: + + while (tokensize - tokenoffset > MAX_COLUMN_BUFFER) { + outputStream.write(token.getBytes(), tokenoffset, MAX_COLUMN_BUFFER); + cachedfileSizes[varindex] += MAX_COLUMN_BUFFER; + tokenoffset += MAX_COLUMN_BUFFER; + } + + } + + // buffer the remaining bytes and reset the buffered + // byte counter: + + System.arraycopy(token.getBytes(), + tokenoffset, + bufferedColumns[varindex], + 0, + tokensize - tokenoffset); + + bufferedSizes[varindex] = tokensize - tokenoffset; + + } else { + // continue buffering + System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokensize); + bufferedSizes[varindex] += tokensize; } - - outputStream.close(); - - // buffer the remaining bytes and reset the buffered - // byte counter: - - System.arraycopy(token.getBytes(), - tokenoffset, - bufferedColumns[varindex], - 0, - tokensize - tokenoffset); - - bufferedSizes[varindex] = tokensize - tokenoffset; - - } else { - // continue buffering - System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokensize); - bufferedSizes[varindex] += tokensize; } + } else { + throw new IOException("Tab file has fewer rows than the stored number of cases!"); } - } else { - scanner.close(); - throw new IOException("Tab file has fewer rows than the stored number of cases!"); } - } // OK, we've created the individual byte vectors of the tab file columns; @@ -1235,60 +1219,61 @@ private File generateRotatedImage (File tabfile, int varcount, int casecount) th // We now need to go through all these buffers and create the final // rotated image file. - BufferedOutputStream finalOut = new BufferedOutputStream(new FileOutputStream (new File(rotatedImageFileName))); - - // but first we should create the offset header and write it out into - // the final file; because it should be at the head, doh! - - long columnOffset = varcount * 8; - // (this is the offset of the first column vector; it is equal to the - // size of the offset header, i.e. varcount * 8 bytes) - - for (int varindex = 0; varindex < varcount; varindex++) { - long totalColumnBytes = cachedfileSizes[varindex] + bufferedSizes[varindex]; - columnOffset+=totalColumnBytes; - //totalColumnBytes; - byte[] columnOffsetByteArray = ByteBuffer.allocate(8).putLong(columnOffset).array(); - System.arraycopy(columnOffsetByteArray, 0, offsetHeader, varindex * 8, 8); - } - - finalOut.write(offsetHeader, 0, varcount * 8); - - for (int varindex = 0; varindex < varcount; varindex++) { - long cachedBytesRead = 0; - - // check if there is a cached temp file: - - File cachedTempFile = columnTempFiles[varindex]; - if (cachedTempFile != null) { - byte[] cachedBytes = new byte[MAX_COLUMN_BUFFER]; - BufferedInputStream cachedIn = new BufferedInputStream(new FileInputStream(cachedTempFile)); - int readlen = 0; - while ((readlen = cachedIn.read(cachedBytes)) > -1) { - finalOut.write(cachedBytes, 0, readlen); - cachedBytesRead += readlen; - } - cachedIn.close(); - // delete the temp file: - cachedTempFile.delete(); - + try (BufferedOutputStream finalOut = new BufferedOutputStream( + new FileOutputStream(new File(rotatedImageFileName)))) { + + // but first we should create the offset header and write it out into + // the final file; because it should be at the head, doh! + + long columnOffset = varcount * 8; + // (this is the offset of the first column vector; it is equal to the + // size of the offset header, i.e. varcount * 8 bytes) + + for (int varindex = 0; varindex < varcount; varindex++) { + long totalColumnBytes = cachedfileSizes[varindex] + bufferedSizes[varindex]; + columnOffset += totalColumnBytes; + // totalColumnBytes; + byte[] columnOffsetByteArray = ByteBuffer.allocate(8).putLong(columnOffset).array(); + System.arraycopy(columnOffsetByteArray, 0, offsetHeader, varindex * 8, 8); } - - if (cachedBytesRead != cachedfileSizes[varindex]) { - finalOut.close(); - throw new IOException("Could not read the correct number of bytes cached for column "+varindex+"; "+ + + finalOut.write(offsetHeader, 0, varcount * 8); + + for (int varindex = 0; varindex < varcount; varindex++) { + long cachedBytesRead = 0; + + // check if there is a cached temp file: + + File cachedTempFile = columnTempFiles[varindex]; + if (cachedTempFile != null) { + byte[] cachedBytes = new byte[MAX_COLUMN_BUFFER]; + try (BufferedInputStream cachedIn = new BufferedInputStream(new FileInputStream(cachedTempFile))) { + int readlen = 0; + while ((readlen = cachedIn.read(cachedBytes)) > -1) { + finalOut.write(cachedBytes, 0, readlen); + cachedBytesRead += readlen; + } + } + + // delete the temp file: + cachedTempFile.delete(); + + } + + if (cachedBytesRead != cachedfileSizes[varindex]) { + throw new IOException("Could not read the correct number of bytes cached for column "+varindex+"; "+ cachedfileSizes[varindex] + " bytes expected, "+cachedBytesRead+" read."); + } + + // then check if there are any bytes buffered for this column: + + if (bufferedSizes[varindex] > 0) { + finalOut.write(bufferedColumns[varindex], 0, bufferedSizes[varindex]); + } + } - - // then check if there are any bytes buffered for this column: - - if (bufferedSizes[varindex] > 0) { - finalOut.write(bufferedColumns[varindex], 0, bufferedSizes[varindex]); - } - } - finalOut.close(); return new File(rotatedImageFileName); } @@ -1305,88 +1290,87 @@ private File generateRotatedImage (File tabfile, int varcount, int casecount) th */ private void reverseRotatedImage (File rotfile, int varcount, int casecount) throws IOException { // open the file, read in the offset header: - BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotfile)); - - byte[] offsetHeader = new byte[varcount * 8]; - long[] byteOffsets = new long[varcount]; - - int readlen = rotfileStream.read(offsetHeader); - - if (readlen != varcount * 8) { - throw new IOException ("Could not read "+varcount*8+" header bytes from the rotated file."); - } - - for (int varindex = 0; varindex < varcount; varindex++) { - byte[] offsetBytes = new byte[8]; - System.arraycopy(offsetHeader, varindex*8, offsetBytes, 0, 8); - - ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes); - byteOffsets[varindex] = offsetByteBuffer.getLong(); - - //System.out.println(byteOffsets[varindex]); - } - - String [][] reversedMatrix = new String[casecount][varcount]; - - long offset = varcount * 8; - byte[] columnBytes; - - for (int varindex = 0; varindex < varcount; varindex++) { - long columnLength = byteOffsets[varindex] - offset; + try (BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotfile))) { + byte[] offsetHeader = new byte[varcount * 8]; + long[] byteOffsets = new long[varcount]; + int readlen = rotfileStream.read(offsetHeader); - - columnBytes = new byte[(int)columnLength]; - readlen = rotfileStream.read(columnBytes); - - if (readlen != columnLength) { - throw new IOException ("Could not read "+columnBytes+" bytes for column "+varindex); + if (readlen != varcount * 8) { + throw new IOException ("Could not read "+varcount*8+" header bytes from the rotated file."); } - /* - String columnString = new String(columnBytes); - //System.out.print(columnString); - String[] values = columnString.split("\n", -1); - if (values.length < casecount) { - throw new IOException("count mismatch: "+values.length+" tokens found for column "+varindex); + for (int varindex = 0; varindex < varcount; varindex++) { + byte[] offsetBytes = new byte[8]; + System.arraycopy(offsetHeader, varindex*8, offsetBytes, 0, 8); + + ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes); + byteOffsets[varindex] = offsetByteBuffer.getLong(); + + //System.out.println(byteOffsets[varindex]); } - for (int caseindex = 0; caseindex < casecount; caseindex++) { - reversedMatrix[caseindex][varindex] = values[caseindex]; - }*/ + String [][] reversedMatrix = new String[casecount][varcount]; + + long offset = varcount * 8; + byte[] columnBytes; - int bytecount = 0; - int byteoffset = 0; - int caseindex = 0; - //System.out.println("generating value vector for column "+varindex); - while (bytecount < columnLength) { - if (columnBytes[bytecount] == '\n') { - String token = new String(columnBytes, byteoffset, bytecount-byteoffset); - reversedMatrix[caseindex++][varindex] = token; - byteoffset = bytecount + 1; + for (int varindex = 0; varindex < varcount; varindex++) { + long columnLength = byteOffsets[varindex] - offset; + + + + columnBytes = new byte[(int)columnLength]; + readlen = rotfileStream.read(columnBytes); + + if (readlen != columnLength) { + throw new IOException ("Could not read "+columnBytes+" bytes for column "+varindex); + } + /* + String columnString = new String(columnBytes); + //System.out.print(columnString); + String[] values = columnString.split("\n", -1); + + if (values.length < casecount) { + throw new IOException("count mismatch: "+values.length+" tokens found for column "+varindex); } - bytecount++; + + for (int caseindex = 0; caseindex < casecount; caseindex++) { + reversedMatrix[caseindex][varindex] = values[caseindex]; + }*/ + + int bytecount = 0; + int byteoffset = 0; + int caseindex = 0; + //System.out.println("generating value vector for column "+varindex); + while (bytecount < columnLength) { + if (columnBytes[bytecount] == '\n') { + String token = new String(columnBytes, byteoffset, bytecount-byteoffset); + reversedMatrix[caseindex++][varindex] = token; + byteoffset = bytecount + 1; + } + bytecount++; + } + + if (caseindex != casecount) { + throw new IOException("count mismatch: "+caseindex+" tokens found for column "+varindex); + } + offset = byteOffsets[varindex]; } - if (caseindex != casecount) { - throw new IOException("count mismatch: "+caseindex+" tokens found for column "+varindex); - } - offset = byteOffsets[varindex]; - } - - for (int caseindex = 0; caseindex < casecount; caseindex++) { - for (int varindex = 0; varindex < varcount; varindex++) { - System.out.print(reversedMatrix[caseindex][varindex]); - if (varindex < varcount-1) { - System.out.print("\t"); - } else { - System.out.print("\n"); + for (int caseindex = 0; caseindex < casecount; caseindex++) { + for (int varindex = 0; varindex < varcount; varindex++) { + System.out.print(reversedMatrix[caseindex][varindex]); + if (varindex < varcount-1) { + System.out.print("\t"); + } else { + System.out.print("\n"); + } } } + } - rotfileStream.close(); - } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index 7683aab7dfa..f1785a42098 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -25,12 +25,8 @@ import java.nio.channels.FileChannel; import java.nio.file.Files; import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; +import java.util.*; import java.util.logging.Logger; -import java.util.Base64; -import java.util.HashMap; -import java.util.Map; import javax.imageio.ImageIO; import org.apache.commons.io.IOUtils; import static edu.harvard.iq.dataverse.dataaccess.DataAccess.getStorageIO; @@ -43,6 +39,7 @@ import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.EnumUtils; public class DatasetUtil { @@ -459,7 +456,7 @@ public static List getDatasetSummaryFields(DatasetVersion datasetV } public static boolean isRsyncAppropriateStorageDriver(Dataset dataset){ - // ToDo - rsync was written before multiple store support and currently is hardcoded to use the DataAccess.S3 store. + // ToDo - rsync was written before multiple store support and currently is hardcoded to use the DataAccess.S3 store. // When those restrictions are lifted/rsync can be configured per store, this test should check that setting // instead of testing for the 's3" store, //This method is used by both the dataset and edit files page so one change here @@ -551,7 +548,7 @@ public static License getLicense(DatasetVersion dsv) { public static String getLicenseName(DatasetVersion dsv) { License license = DatasetUtil.getLicense(dsv); - return license != null ? license.getName() + return license != null ? getLocalizedLicenseDetails(license,"NAME") : BundleUtil.getStringFromBundle("license.custom"); } @@ -577,7 +574,30 @@ public static String getLicenseIcon(DatasetVersion dsv) { public static String getLicenseDescription(DatasetVersion dsv) { License license = DatasetUtil.getLicense(dsv); - return license != null ? license.getShortDescription() : BundleUtil.getStringFromBundle("license.custom.description"); + return license != null ? getLocalizedLicenseDetails(license,"DESCRIPTION") : BundleUtil.getStringFromBundle("license.custom.description"); + } + + public enum LicenseOption { + NAME, DESCRIPTION + }; + + public static String getLocalizedLicenseDetails(License license,String keyPart) { + String licenseName = license.getName(); + String localizedLicenseValue = "" ; + try { + if (EnumUtils.isValidEnum(LicenseOption.class, keyPart ) ){ + String key = "license." + licenseName.toLowerCase().replace(" ", "_") + "." + keyPart.toLowerCase(); + localizedLicenseValue = BundleUtil.getStringFromPropertyFile(key, "License"); + } + } + catch (Exception e) { + localizedLicenseValue = licenseName; + } + + if (localizedLicenseValue == null) { + localizedLicenseValue = licenseName ; + } + return localizedLicenseValue; } public static String getLocaleExternalStatus(String status) { diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 8e7922fd83b..1d0ec0f19d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -26,20 +26,22 @@ import edu.harvard.iq.dataverse.engine.command.impl.RestrictFileCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; -import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; import edu.harvard.iq.dataverse.util.json.JsonPrinter; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + import java.io.IOException; import java.io.InputStream; -import java.io.StringReader; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.logging.Level; @@ -47,10 +49,10 @@ import javax.ejb.EJBException; import javax.json.Json; import javax.json.JsonArrayBuilder; +import javax.json.JsonNumber; import javax.json.JsonObject; import javax.json.JsonArray; import javax.json.JsonObjectBuilder; -import javax.json.JsonReader; import javax.validation.ConstraintViolation; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; @@ -114,10 +116,9 @@ public class AddReplaceFileHelper{ public static String FILE_ADD_OPERATION = "FILE_ADD_OPERATION"; public static String FILE_REPLACE_OPERATION = "FILE_REPLACE_OPERATION"; public static String FILE_REPLACE_FORCE_OPERATION = "FILE_REPLACE_FORCE_OPERATION"; - public static String MULTIPLEFILES_ADD_OPERATION = "MULTIPLEFILES_ADD_OPERATION"; - + private String currentOperation; - + boolean multifile = false; // ----------------------------------- // All the needed EJBs, passed to the constructor // ----------------------------------- @@ -127,8 +128,6 @@ public class AddReplaceFileHelper{ private PermissionServiceBean permissionService; private EjbDataverseEngine commandEngine; private SystemConfig systemConfig; - private LicenseServiceBean licenseServiceBean; - // ----------------------------------- // Instance variables directly added // ----------------------------------- @@ -144,10 +143,6 @@ public class AddReplaceFileHelper{ // -- Optional private DataFile fileToReplace; // step 25 - // ----------------------------------- - // Instance variables derived from other input - // ----------------------------------- - private User user; private DatasetVersion workingVersion; private DatasetVersion clone; List initialFileList; @@ -256,13 +251,12 @@ public void resetFileHelper(){ * @param dvRequest */ public AddReplaceFileHelper(DataverseRequest dvRequest, - IngestServiceBean ingestService, + IngestServiceBean ingestService, DatasetServiceBean datasetService, DataFileServiceBean fileService, PermissionServiceBean permissionService, EjbDataverseEngine commandEngine, - SystemConfig systemConfig, - LicenseServiceBean licenseServiceBean){ + SystemConfig systemConfig){ // --------------------------------- // make sure DataverseRequest isn't null and has a user @@ -304,16 +298,12 @@ public AddReplaceFileHelper(DataverseRequest dvRequest, this.permissionService = permissionService; this.commandEngine = commandEngine; this.systemConfig = systemConfig; - this.licenseServiceBean = licenseServiceBean; - - - initErrorHandling(); // Initiate instance vars this.dataset = null; this.dvRequest = dvRequest; - this.user = dvRequest.getUser(); + dvRequest.getUser(); } @@ -336,7 +326,7 @@ public boolean runAddFileByDataset(Dataset chosenDataset, } - public boolean runAddFileByDataset(Dataset chosenDataset, + private boolean runAddFileByDataset(Dataset chosenDataset, String newFileName, String newFileContentType, String newStorageIdentifier, @@ -348,12 +338,8 @@ public boolean runAddFileByDataset(Dataset chosenDataset, initErrorHandling(); - if(multipleFiles) { - this.currentOperation = MULTIPLEFILES_ADD_OPERATION; - } - else { - this.currentOperation = FILE_ADD_OPERATION; - } + multifile=multipleFiles; + this.currentOperation = FILE_ADD_OPERATION; if (!this.step_001_loadDataset(chosenDataset)){ return false; @@ -393,6 +379,11 @@ public boolean runAddFile(Dataset dataset, }*/ + public boolean runForceReplaceFile(long fileToReplaceId, String newFilename, String newFileContentType, + String newStorageIdentifier, InputStream newFileInputStream, Dataset ds, OptionalFileParams optionalFileParams) { + return runForceReplaceFile(fileToReplaceId, newFilename, newFileContentType, + newStorageIdentifier, newFileInputStream, ds, optionalFileParams, false); + } /** * After the constructor, this method is called to replace a file * @@ -403,16 +394,19 @@ public boolean runAddFile(Dataset dataset, * @param newFileInputStream * @return */ - public boolean runForceReplaceFile(Long oldFileId, + private boolean runForceReplaceFile(Long oldFileId, String newFileName, String newFileContentType, String newStorageIdentifier, InputStream newFileInputStream, - OptionalFileParams optionalFileParams){ + Dataset ds, + OptionalFileParams optionalFileParams, + boolean multipleFiles){ msgt(">> runForceReplaceFile"); initErrorHandling(); + multifile=multipleFiles; this.currentOperation = FILE_REPLACE_FORCE_OPERATION; @@ -426,22 +420,35 @@ public boolean runForceReplaceFile(Long oldFileId, if (!this.step_005_loadFileToReplaceById(oldFileId)){ return false; } + if(!ds.getId().equals(fileToReplace.getOwner().getId())) { + this.addErrorSevere(getBundleErr("existing_file_to_replace_not_in_dataset")); + return false; + } + // ds may include changes not yet in the copy created when loading the file from the db, as in replaceFiles() + return this.runAddReplaceFile(ds, newFileName, newFileContentType, newStorageIdentifier, newFileInputStream, optionalFileParams); + } + + public boolean runReplaceFile(long fileToReplaceId, String newFilename, String newFileContentType, + String newStorageIdentifier, InputStream newFileInputStream, Dataset ds, OptionalFileParams optionalFileParams) { + return runReplaceFile(fileToReplaceId, newFilename, newFileContentType, + newStorageIdentifier, newFileInputStream, ds, optionalFileParams, false); - return this.runAddReplaceFile(fileToReplace.getOwner(), newFileName, newFileContentType, newStorageIdentifier, newFileInputStream, optionalFileParams); } - - public boolean runReplaceFile(Long oldFileId, + private boolean runReplaceFile(Long oldFileId, String newFileName, String newFileContentType, String newStorageIdentifier, InputStream newFileInputStream, - OptionalFileParams optionalFileParams){ + Dataset ds, + OptionalFileParams optionalFileParams, + boolean multipleFiles){ msgt(">> runReplaceFile"); initErrorHandling(); + multifile=multipleFiles; this.currentOperation = FILE_REPLACE_OPERATION; if (oldFileId==null){ @@ -455,7 +462,13 @@ public boolean runReplaceFile(Long oldFileId, if (!this.step_005_loadFileToReplaceById(oldFileId)){ return false; } - return this.runAddReplaceFile(fileToReplace.getOwner(), newFileName, newFileContentType, newStorageIdentifier, newFileInputStream, optionalFileParams); + + if(!ds.getId().equals(fileToReplace.getOwner().getId())) { + this.addErrorSevere(getBundleErr("existing_file_to_replace_not_in_dataset")); + return false; + } + // ds may include changes not yet in the copy created when loading the file from the db, as in replaceFiles() + return this.runAddReplaceFile(ds, newFileName, newFileContentType, newStorageIdentifier, newFileInputStream, optionalFileParams); } @@ -759,19 +772,15 @@ private boolean runAddReplacePhase2(boolean tabIngest){ return false; } - - if (this.isFileReplaceOperation()){ + if (this.isFileReplaceOperation()) { msgt("step_080_run_update_dataset_command_for_replace"); - if (!this.step_080_run_update_dataset_command_for_replace()){ - return false; + if (!this.step_080_run_update_dataset_command_for_replace()) { + return false; } - - }else{ + } else if (!multifile) { msgt("step_070_run_update_dataset_command"); - if (!this.isMultipleFilesAddOperation()) { - if (!this.step_070_run_update_dataset_command()) { - return false; - } + if (!this.step_070_run_update_dataset_command()) { + return false; } } @@ -834,16 +843,6 @@ public boolean isFileAddOperation(){ return this.currentOperation.equals(FILE_ADD_OPERATION); } - /** - * Is this a multiple files add operation ? - * @return - */ - - public boolean isMultipleFilesAddOperation(){ - - return this.currentOperation.equals(MULTIPLEFILES_ADD_OPERATION); - } - /** * Initialize error handling vars */ @@ -1200,8 +1199,11 @@ private boolean step_030_createNewFilesViaIngest(){ } // Load the working version of the Dataset - workingVersion = dataset.getEditVersion(); - clone = workingVersion.cloneDatasetVersion(); + workingVersion = dataset.getOrCreateEditVersion(); + if(!multifile) { + //Don't repeatedly update the clone (losing changes) in multifile case + clone = workingVersion.cloneDatasetVersion(); + } try { CreateDataFileResult result = FileUtil.createDataFiles(workingVersion, this.newFileInputStream, @@ -1292,9 +1294,6 @@ private boolean step_040_auto_checkForDuplicates(){ // Initialize new file list this.finalFileList = new ArrayList<>(); - String warningMessage = null; - - if (isFileReplaceOperation() && this.fileToReplace == null){ // This error shouldn't happen if steps called correctly this.addErrorSevere(getBundleErr("existing_file_to_replace_is_null") + " (This error shouldn't happen if steps called in sequence....checkForFileReplaceDuplicate)"); @@ -1511,10 +1510,7 @@ private boolean step_050_checkForConstraintViolations(){ return true; } - // ----------------------------------------------------------- - // violations found: gather all error messages - // ----------------------------------------------------------- - List errMsgs = new ArrayList<>(); + new ArrayList<>(); for (ConstraintViolation violation : constraintViolations) { /* for 8859 return conflict response status if the validation fails @@ -1566,7 +1562,7 @@ private boolean step_055_loadOptionalFileParams(OptionalFileParams optionalFileP } } catch (DataFileTagException ex) { - Logger.getLogger(AddReplaceFileHelper.class.getName()).log(Level.SEVERE, null, ex); + logger.log(Level.SEVERE, null, ex); addError(ex.getMessage()); return false; } catch (CommandException ex) { @@ -1605,70 +1601,81 @@ private boolean step_060_addFilesViaIngestService(boolean tabIngest){ return true; } + List filesToDelete = new ArrayList(); + Map deleteFileStorageLocations = new HashMap<>(); /** * Create and run the update dataset command * * @return */ - private boolean step_070_run_update_dataset_command(){ - - if (this.hasError()){ + private boolean step_070_run_update_dataset_command() { + //Note -only single file operations and multifile replace call this, multifile add does not + if (this.hasError()) { return false; } - Command update_cmd; + Command update_cmd = null; String deleteStorageLocation = null; - long deleteFileId=-1; - if(isFileReplaceOperation()) { - List filesToDelete = new ArrayList(); + long deleteFileId = -1; + if (isFileReplaceOperation()) { + if (!multifile) { + filesToDelete.clear(); + deleteFileStorageLocations.clear(); + } filesToDelete.add(fileToReplace.getFileMetadata()); - - if(!fileToReplace.isReleased()) { - //If file is only in draft version, also need to delete the physical file - deleteStorageLocation = fileService.getPhysicalFileToDelete(fileToReplace); - deleteFileId=fileToReplace.getId(); + + if (!fileToReplace.isReleased()) { + // If file is only in draft version, also need to delete the physical file + deleteStorageLocation = fileService.getPhysicalFileToDelete(fileToReplace); + deleteFileId = fileToReplace.getId(); + deleteFileStorageLocations.put(deleteFileId, deleteStorageLocation); + } + if (!multifile) { + // Adding the file to the delete list for the command will delete this + // filemetadata and, if the file hasn't been released, the datafile itself. + update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, filesToDelete, clone); } - //Adding the file to the delete list for the command will delete this filemetadata and, if the file hasn't been released, the datafile itself. - update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, filesToDelete, clone); } else { - update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, clone); + update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, clone); } - ((UpdateDatasetVersionCommand) update_cmd).setValidateLenient(true); - - try { - // Submit the update dataset command - // and update the local dataset object - // - dataset = commandEngine.submit(update_cmd); - } catch (CommandException ex) { - /** - * @todo Add a test to exercise this error. - */ - this.addErrorSevere(getBundleErr("add.add_file_error")); - logger.severe(ex.getMessage()); - return false; - }catch (EJBException ex) { - /** - * @todo Add a test to exercise this error. - */ - this.addErrorSevere("add.add_file_error (see logs)"); - logger.severe(ex.getMessage()); - return false; + if (!multifile) { + //Avoid NPE in multifile replace case + ((UpdateDatasetVersionCommand) update_cmd).setValidateLenient(true); } - //Sanity check - if(isFileReplaceOperation()) { - if (deleteStorageLocation != null) { - // Finalize the delete of the physical file - // (File service will double-check that the datafile no - // longer exists in the database, before proceeding to - // delete the physical file) - try { - fileService.finalizeFileDelete(deleteFileId, deleteStorageLocation); - } catch (IOException ioex) { - logger.warning("Failed to delete the physical file associated with the deleted datafile id=" - + deleteFileId + ", storage location: " + deleteStorageLocation); - } + if (!multifile) { + try { + // Submit the update dataset command + // and update the local dataset object + // + dataset = commandEngine.submit(update_cmd); + } catch (CommandException ex) { + /** + * @todo Add a test to exercise this error. + */ + this.addErrorSevere(getBundleErr("add.add_file_error")); + logger.severe(ex.getMessage()); + return false; + } catch (EJBException ex) { + /** + * @todo Add a test to exercise this error. + */ + this.addErrorSevere("add.add_file_error (see logs)"); + logger.severe(ex.getMessage()); + return false; + } + } + + if (isFileReplaceOperation() && deleteFileId!=-1 && !multifile) { + // Finalize the delete of the physical file + // (File service will double-check that the datafile no + // longer exists in the database, before proceeding to + // delete the physical file) + try { + fileService.finalizeFileDelete(deleteFileId, deleteStorageLocation); + } catch (IOException ioex) { + logger.warning("Failed to delete the physical file associated with the deleted datafile id=" + + deleteFileId + ", storage location: " + deleteStorageLocation); } } return true; @@ -1766,7 +1773,7 @@ private boolean step_080_run_update_dataset_command_for_replace(){ } /* - * Go through the final file list, settting the rootFileId and previousFileId + * Go through the final file list, setting the rootFileId and previousFileId */ for (DataFile df : finalFileList) { df.setPreviousDataFileId(fileToReplace.getId()); @@ -1775,7 +1782,7 @@ private boolean step_080_run_update_dataset_command_for_replace(){ } } - // Call the update dataset command which will delete the replaced filemetadata and file in needed (if file is not released) + // Call the update dataset command which will delete the replaced filemetadata and file if needed (if file is not released) // return step_070_run_update_dataset_command(); @@ -1805,7 +1812,7 @@ private void setNewlyAddedFiles(List datafiles){ newlyAddedFileMetadatas = new ArrayList<>(); // Loop of uglinesss...but expect 1 to 4 files in final file list - List latestFileMetadatas = dataset.getEditVersion().getFileMetadatas(); + List latestFileMetadatas = dataset.getOrCreateEditVersion().getFileMetadatas(); for (DataFile newlyAddedFile : finalFileList){ @@ -1927,7 +1934,7 @@ private boolean step_100_startIngestJobs(){ //return true; //} - if (!this.isMultipleFilesAddOperation()) { + if (!multifile) { msg("pre ingest start"); // start the ingest! ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser()); @@ -2021,6 +2028,13 @@ public void setDuplicateFileWarning(String duplicateFileWarning) { this.duplicateFileWarning = duplicateFileWarning; } + /** Add multiple pre-positioned files listed in the jsonData. Works with direct upload, Globus, and other out-of-band methods. + * + * @param jsonData - an array of jsonData entries (one per file) using the single add file jsonData format + * @param dataset + * @param authUser + * @return + */ public Response addFiles(String jsonData, Dataset dataset, User authUser) { msgt("(addFilesToDataset) jsonData: " + jsonData.toString()); @@ -2033,15 +2047,14 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { // ----------------------------------------------------------- // Read jsonData and Parse files information from jsondata : // ----------------------------------------------------------- - try (StringReader rdr = new StringReader(jsonData)) { - JsonReader dbJsonReader = Json.createReader(rdr); - filesJson = dbJsonReader.readArray(); - dbJsonReader.close(); + try { + filesJson = JsonUtil.getJsonArray(jsonData); if (filesJson != null) { totalNumberofFiles = filesJson.getValuesAs(JsonObject.class).size(); - + workingVersion = dataset.getOrCreateEditVersion(); + clone = workingVersion.cloneDatasetVersion(); for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { OptionalFileParams optionalFileParams = null; @@ -2065,10 +2078,9 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } msgt("ADD! = " + newFilename); - if (!hasError()) { - runAddFileByDataset(dataset, newFilename, newFileContentType, newStorageIdentifier, - null, optionalFileParams, true); - } + + runAddFileByDataset(dataset, newFilename, newFileContentType, newStorageIdentifier, null, + optionalFileParams, true); if (hasError()) { JsonObjectBuilder fileoutput = Json.createObjectBuilder() .add("storageIdentifier", newStorageIdentifier) @@ -2103,7 +2115,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } } catch (DataFileTagException ex) { - Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); + logger.log(Level.SEVERE, null, ex); JsonObjectBuilder fileoutput = Json.createObjectBuilder() .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode()) .add("message", ex.getMessage()) @@ -2112,7 +2124,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } catch (NoFilesException ex) { - Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); + logger.log(Level.SEVERE, null, ex); JsonObjectBuilder fileoutput = Json.createObjectBuilder() .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode()) .add("message", BundleUtil.getStringFromBundle("NoFileException! Serious Error! See administrator!")) @@ -2131,7 +2143,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } try { - Command cmd = new UpdateDatasetVersionCommand(dataset, dvRequest); + Command cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, clone); ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); commandEngine.submit(cmd); } catch (CommandException ex) { @@ -2140,9 +2152,6 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { dataset = datasetService.find(dataset.getId()); - List s = dataset.getFiles(); - for (DataFile dataFile : s) { - } //ingest job ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); @@ -2166,6 +2175,174 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { .add("status", STATUS_OK) .add("data", Json.createObjectBuilder().add("Files", jarr).add("Result", result)).build() ).build(); } + + /** + * Replace multiple files with prepositioned replacements as listed in the + * jsonData. Works with direct upload, Globus, and other out-of-band methods. + * + * @param jsonData - must include fileToReplaceId key with file ID and may include forceReplace key with true/false(default) + * @param dataset + * @param authUser + * @return + */ + + public Response replaceFiles(String jsonData, Dataset ds, User authUser) { + msgt("(replaceFilesInDataset) jsonData: " + jsonData.toString()); + + this.dataset = ds; + JsonArrayBuilder jarr = Json.createArrayBuilder(); + + JsonArray filesJson = null; + + int totalNumberofFiles = 0; + int successNumberofFiles = 0; + // ----------------------------------------------------------- + // Read jsonData and Parse files information from jsondata : + // ----------------------------------------------------------- + try { + filesJson = JsonUtil.getJsonArray(jsonData); + + + if (filesJson != null) { + totalNumberofFiles = filesJson.getValuesAs(JsonObject.class).size(); + workingVersion = dataset.getOrCreateEditVersion(); + clone = workingVersion.cloneDatasetVersion(); + for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { + boolean forceReplace = false; + // (2a) Check for optional "forceReplace" + if ((fileJson.containsKey("forceReplace"))) { + forceReplace = fileJson.getBoolean("forceReplace", false); + } + long fileToReplaceId = -1; + JsonNumber ftri = fileJson.getJsonNumber("fileToReplaceId"); + if(ftri !=null) { + fileToReplaceId = ftri.longValueExact(); + } + + OptionalFileParams optionalFileParams = null; + try { + // (2b) Load up optional params via JSON + // - Will skip extra attributes which includes fileToReplaceId and forceReplace + optionalFileParams = new OptionalFileParams(fileJson.toString()); + + String newFilename = null; + String newFileContentType = null; + String newStorageIdentifier = null; + if ((fileToReplaceId !=-1) && optionalFileParams.hasStorageIdentifier()) { + newStorageIdentifier = optionalFileParams.getStorageIdentifier(); + newStorageIdentifier = DataAccess.expandStorageIdentifierIfNeeded(newStorageIdentifier); + if(!DataAccess.uploadToDatasetAllowed(dataset, newStorageIdentifier)) { + addErrorSevere("Dataset store configuration does not allow provided storageIdentifier."); + } + if (optionalFileParams.hasFileName()) { + newFilename = optionalFileParams.getFileName(); + if (optionalFileParams.hasMimetype()) { + newFileContentType = optionalFileParams.getMimeType(); + } + } + + msgt("REPLACE! = " + newFilename); + if (forceReplace) { + runForceReplaceFile(fileToReplaceId, newFilename, newFileContentType, + newStorageIdentifier, null, dataset, optionalFileParams, true); + } else { + runReplaceFile(fileToReplaceId, newFilename, newFileContentType, newStorageIdentifier, + null, dataset, optionalFileParams, true); + } + if (hasError()) { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", newStorageIdentifier) + .add("errorMessage", getHttpErrorCode().toString() +":"+ getErrorMessagesAsString("\n")) + .add("fileDetails", fileJson); + jarr.add(fileoutput); + } else { + JsonObject successresult = getSuccessResultAsJsonObjectBuilder().build(); + String duplicateWarning = getDuplicateFileWarning(); + + if (duplicateWarning != null && !duplicateWarning.isEmpty()) { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", newStorageIdentifier) + .add("warningMessage", getDuplicateFileWarning()) + .add("fileDetails", successresult.getJsonArray("files").getJsonObject(0)); + jarr.add(fileoutput); + } else { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", newStorageIdentifier) + .add("successMessage", "Replaced successfully in the dataset") + .add("fileDetails", successresult.getJsonArray("files").getJsonObject(0)); + jarr.add(fileoutput); + } + successNumberofFiles = successNumberofFiles + 1; + } + } else { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("errorMessage", "You must provide a fileToReplaceId, storageidentifier, filename, and mimetype.") + .add("fileDetails", fileJson); + + jarr.add(fileoutput); + } + + } catch (DataFileTagException ex) { + logger.log(Level.SEVERE, null, ex); + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode()) + .add("message", ex.getMessage()) + .add("fileDetails", fileJson); + jarr.add(fileoutput); + + } + catch (NoFilesException ex) { + logger.log(Level.SEVERE, null, ex); + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode()) + .add("message", BundleUtil.getStringFromBundle("NoFileException! Serious Error! See administrator!")) + .add("fileDetails", fileJson); + jarr.add(fileoutput); + } + }// End of adding files + + DatasetLock eipLock = dataset.getLockFor(DatasetLock.Reason.EditInProgress); + if (eipLock == null) { + logger.warning("Dataset not locked for EditInProgress "); + } else { + datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); + logger.info("Removed EditInProgress lock "); + } + + try { + Command cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, filesToDelete, clone); + ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); + commandEngine.submit(cmd); + } catch (CommandException ex) { + return error(Response.Status.INTERNAL_SERVER_ERROR, "CommandException updating DatasetVersion from addFiles job: " + ex.getMessage()); + } + + fileService.finalizeFileDeletes(deleteFileStorageLocations); + + dataset = datasetService.find(dataset.getId()); + + //ingest job + ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); + + } + } + catch ( javax.json.stream.JsonParsingException ex) { + ex.printStackTrace(); + return error(BAD_REQUEST, "Json Parsing Exception :" + ex.getMessage()); + } + catch (Exception e) { + e.printStackTrace(); + return error(BAD_REQUEST, e.getMessage()); + } + + JsonObjectBuilder result = Json.createObjectBuilder() + .add("Total number of files", totalNumberofFiles) + .add("Number of files successfully replaced", successNumberofFiles); + + return Response.ok().entity(Json.createObjectBuilder() + .add("status", STATUS_OK) + .add("data", Json.createObjectBuilder().add("Files", jarr).add("Result", result)).build() ).build(); + } protected static Response error(Response.Status sts, String msg ) { return Response.status(sts) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java index 534e07feaae..1efaf14c755 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java @@ -81,7 +81,7 @@ protected void additionalParameterTests(CommandContext ctxt) throws CommandExcep @Override protected DatasetVersion getVersionToPersist( Dataset theDataset ) { - return theDataset.getEditVersion(); + return theDataset.getOrCreateEditVersion(); } @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java index 772b6205b02..ca5bf1d3f2c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java @@ -56,7 +56,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { DatasetVersion updateVersion = getDataset().getLatestVersionForCopy(); // Copy metadata from draft version to latest published version - updateVersion.setDatasetFields(getDataset().getEditVersion().initDatasetFields()); + updateVersion.setDatasetFields(getDataset().getOrCreateEditVersion().initDatasetFields()); validateOrDie(updateVersion, isValidateLenient()); @@ -68,14 +68,14 @@ public Dataset execute(CommandContext ctxt) throws CommandException { TermsOfUseAndAccess oldTerms = updateVersion.getTermsOfUseAndAccess(); - TermsOfUseAndAccess newTerms = getDataset().getEditVersion().getTermsOfUseAndAccess(); + TermsOfUseAndAccess newTerms = getDataset().getOrCreateEditVersion().getTermsOfUseAndAccess(); newTerms.setDatasetVersion(updateVersion); updateVersion.setTermsOfUseAndAccess(newTerms); //Put old terms on version that will be deleted.... - getDataset().getEditVersion().setTermsOfUseAndAccess(oldTerms); + getDataset().getOrCreateEditVersion().setTermsOfUseAndAccess(oldTerms); //Also set the fileaccessrequest boolean on the dataset to match the new terms getDataset().setFileAccessRequest(updateVersion.getTermsOfUseAndAccess().isFileAccessRequest()); - List newComments = getDataset().getEditVersion().getWorkflowComments(); + List newComments = getDataset().getOrCreateEditVersion().getWorkflowComments(); if (newComments!=null && newComments.size() >0) { for(WorkflowComment wfc: newComments) { wfc.setDatasetVersion(updateVersion); @@ -91,7 +91,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // Look for file metadata changes and update published metadata if needed List pubFmds = updateVersion.getFileMetadatas(); int pubFileCount = pubFmds.size(); - int newFileCount = tempDataset.getEditVersion().getFileMetadatas().size(); + int newFileCount = tempDataset.getOrCreateEditVersion().getFileMetadatas().size(); /* The policy for this command is that it should only be used when the change is a 'minor update' with no file changes. * Nominally we could call .isMinorUpdate() for that but we're making the same checks as we go through the update here. */ @@ -99,6 +99,10 @@ public Dataset execute(CommandContext ctxt) throws CommandException { logger.severe("Draft version of dataset: " + tempDataset.getId() + " has: " + newFileCount + " while last published version has " + pubFileCount); throw new IllegalCommandException(BundleUtil.getStringFromBundle("datasetversion.update.failure"), this); } + Long thumbId = null; + if(tempDataset.getThumbnailFile()!=null) { + thumbId = tempDataset.getThumbnailFile().getId(); + }; for (FileMetadata publishedFmd : pubFmds) { DataFile dataFile = publishedFmd.getDataFile(); FileMetadata draftFmd = dataFile.getLatestFileMetadata(); @@ -131,11 +135,15 @@ public Dataset execute(CommandContext ctxt) throws CommandException { ctxt.em().remove(mergedFmd); // including removing metadata from the list on the datafile draftFmd.getDataFile().getFileMetadatas().remove(draftFmd); - tempDataset.getEditVersion().getFileMetadatas().remove(draftFmd); + tempDataset.getOrCreateEditVersion().getFileMetadatas().remove(draftFmd); // And any references in the list held by categories for (DataFileCategory cat : tempDataset.getCategories()) { cat.getFileMetadatas().remove(draftFmd); } + //And any thumbnail reference + if(publishedFmd.getDataFile().getId()==thumbId) { + tempDataset.setThumbnailFile(publishedFmd.getDataFile()); + } } // Update modification time on the published version and the dataset diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 89666f02db2..f23033f09fa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -305,7 +305,10 @@ public static String createJWTString(Algorithm algorithmRSA, String installation String canonicalBody = new JsonCanonicalizer(body).getEncodedString(); logger.fine("Canonical body: " + canonicalBody); String digest = DigestUtils.sha256Hex(canonicalBody); - return JWT.create().withIssuer(BrandingUtil.getInstallationBrandName()).withIssuedAt(Date.from(Instant.now())) + if(installationBrandName==null) { + installationBrandName = BrandingUtil.getInstallationBrandName(); + } + return JWT.create().withIssuer(installationBrandName).withIssuedAt(Date.from(Instant.now())) .withExpiresAt(Date.from(Instant.now().plusSeconds(60 * expirationInMinutes))) .withKeyId("defaultDataverse").withClaim("bodySHA256Hash", digest).sign(algorithmRSA); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDraftDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDraftDatasetVersionCommand.java index 88b5a75ea22..7e32b19e576 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDraftDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDraftDatasetVersionCommand.java @@ -24,7 +24,7 @@ public GetDraftDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffect @Override public DatasetVersion execute(CommandContext ctxt) throws CommandException { - return ds.getEditVersion(); + return ds.getOrCreateEditVersion(); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index 5d017173685..da2701a41e7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -1,16 +1,27 @@ package edu.harvard.iq.dataverse.engine.command.impl; +import com.google.auth.oauth2.ServiceAccountCredentials; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Bucket; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.StorageException; +import com.google.cloud.storage.StorageOptions; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DatasetLock.Reason; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; +import org.apache.commons.codec.binary.Hex; +import javax.json.Json; +import javax.json.JsonObjectBuilder; +import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.PipedInputStream; @@ -21,17 +32,6 @@ import java.util.Map; import java.util.logging.Logger; -import javax.json.Json; -import javax.json.JsonObjectBuilder; - -import org.apache.commons.codec.binary.Hex; -import com.google.auth.oauth2.ServiceAccountCredentials; -import com.google.cloud.storage.Blob; -import com.google.cloud.storage.Bucket; -import com.google.cloud.storage.Storage; -import com.google.cloud.storage.StorageException; -import com.google.cloud.storage.StorageOptions; - @RequiredPermissions(Permission.PublishDataset) public class GoogleCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command { @@ -56,10 +56,11 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); - try { - FileInputStream fis = new FileInputStream(System.getProperty("dataverse.files.directory") + System.getProperty("file.separator") + "googlecloudkey.json"); + String cloudKeyFile = JvmSettings.FILES_DIRECTORY.lookup() + File.separator + "googlecloudkey.json"; + + try (FileInputStream cloudKeyStream = new FileInputStream(cloudKeyFile)) { storage = StorageOptions.newBuilder() - .setCredentials(ServiceAccountCredentials.fromStream(fis)) + .setCredentials(ServiceAccountCredentials.fromStream(cloudKeyStream)) .setProjectId(projectName) .build() .getService(); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java index 64beba82450..5f31ea756eb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java @@ -12,17 +12,20 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; -import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; -import java.io.File; -import java.util.Properties; -import java.util.logging.Level; -import java.util.logging.Logger; +import edu.harvard.iq.dataverse.settings.JvmSettings; + import javax.batch.operations.JobOperator; import javax.batch.operations.JobSecurityException; import javax.batch.operations.JobStartException; import javax.batch.runtime.BatchRuntime; import javax.json.JsonObject; import javax.json.JsonObjectBuilder; +import java.io.File; +import java.util.Properties; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; @RequiredPermissions(Permission.EditDataset) public class ImportFromFileSystemCommand extends AbstractCommand { @@ -69,18 +72,20 @@ public JsonObject execute(CommandContext ctxt) throws CommandException { logger.info(error); throw new IllegalCommandException(error, this); } - File directory = new File(System.getProperty("dataverse.files.directory") - + File.separator + dataset.getAuthority() + File.separator + dataset.getIdentifier()); - // TODO: - // The above goes directly to the filesystem directory configured by the - // old "dataverse.files.directory" JVM option (otherwise used for temp - // files only, after the Multistore implementation (#6488). - // We probably want package files to be able to use specific stores instead. - // More importantly perhaps, the approach above does not take into account - // if the dataset may have an AlternativePersistentIdentifier, that may be - // designated isStorageLocationDesignator() - i.e., if a different identifer - // needs to be used to name the storage directory, instead of the main/current - // persistent identifier above. + + File directory = new File( + String.join(File.separator, JvmSettings.FILES_DIRECTORY.lookup(), + dataset.getAuthority(), dataset.getIdentifier())); + + // TODO: The above goes directly to the filesystem directory configured by the + // old "dataverse.files.directory" JVM option (otherwise used for temp + // files only, after the Multistore implementation (#6488). + // We probably want package files to be able to use specific stores instead. + // More importantly perhaps, the approach above does not take into account + // if the dataset may have an AlternativePersistentIdentifier, that may be + // designated isStorageLocationDesignator() - i.e., if a different identifer + // needs to be used to name the storage directory, instead of the main/current + // persistent identifier above. if (!isValidDirectory(directory)) { String error = "Dataset directory is invalid. " + directory; logger.info(error); @@ -93,11 +98,10 @@ public JsonObject execute(CommandContext ctxt) throws CommandException { throw new IllegalCommandException(error, this); } - File uploadDirectory = new File(System.getProperty("dataverse.files.directory") - + File.separator + dataset.getAuthority() + File.separator + dataset.getIdentifier() - + File.separator + uploadFolder); - // TODO: - // see the comment above. + File uploadDirectory = new File(String.join(File.separator, JvmSettings.FILES_DIRECTORY.lookup(), + dataset.getAuthority(), dataset.getIdentifier(), uploadFolder)); + + // TODO: see the comment above. if (!isValidDirectory(uploadDirectory)) { String error = "Upload folder is not a valid directory."; logger.info(error); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PersistProvFreeFormCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PersistProvFreeFormCommand.java index aa06967675f..a258c36d6ea 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PersistProvFreeFormCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PersistProvFreeFormCommand.java @@ -36,7 +36,7 @@ public DataFile execute(CommandContext ctxt) throws CommandException { } else { Dataset dataset = dataFile.getOwner(); - DatasetVersion workingVersion = dataset.getEditVersion(); + DatasetVersion workingVersion = dataset.getOrCreateEditVersion(); if (workingVersion.isDraft()) { if (dataset.isReleased()){ diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RestrictFileCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RestrictFileCommand.java index 16fa40cd8a7..38cbeaf3d66 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RestrictFileCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RestrictFileCommand.java @@ -63,7 +63,7 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { } else { Dataset dataset = file.getOwner(); - DatasetVersion workingVersion = dataset.getEditVersion(); + DatasetVersion workingVersion = dataset.getOrCreateEditVersion(); // We need the FileMetadata for the file in the draft dataset version and the // file we have may still reference the fmd from the prior released version FileMetadata draftFmd = file.getFileMetadata(); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java index 169f6d790d3..ba0348f57d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java @@ -37,11 +37,11 @@ public Dataset execute(CommandContext ctxt) throws CommandException { throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.reject.datasetNotInReview"), this); } - dataset.getEditVersion().setLastUpdateTime(getTimestamp()); + dataset.getOrCreateEditVersion().setLastUpdateTime(getTimestamp()); dataset.setModificationTime(getTimestamp()); ctxt.engine().submit( new RemoveLockCommand(getRequest(), getDataset(), DatasetLock.Reason.InReview) ); - WorkflowComment workflowComment = new WorkflowComment(dataset.getEditVersion(), WorkflowComment.Type.RETURN_TO_AUTHOR, comment, (AuthenticatedUser) this.getUser()); + WorkflowComment workflowComment = new WorkflowComment(dataset.getOrCreateEditVersion(), WorkflowComment.Type.RETURN_TO_AUTHOR, comment, (AuthenticatedUser) this.getUser()); ctxt.datasets().addWorkflowComment(workflowComment); updateDatasetUser(ctxt); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java index c3a62a35bb3..72f0ef335fb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java @@ -77,7 +77,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { public Dataset save(CommandContext ctxt) throws CommandException { - getDataset().getEditVersion().setLastUpdateTime(getTimestamp()); + getDataset().getOrCreateEditVersion().setLastUpdateTime(getTimestamp()); getDataset().setModificationTime(getTimestamp()); Dataset savedDataset = ctxt.em().merge(getDataset()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java index e38f5bae8e0..130030798ab 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java @@ -51,7 +51,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { private Dataset save(CommandContext ctxt) throws CommandException { - getDataset().getEditVersion().setLastUpdateTime(getTimestamp()); + getDataset().getOrCreateEditVersion().setLastUpdateTime(getTimestamp()); getDataset().setModificationTime(getTimestamp()); Dataset savedDataset = ctxt.em().merge(getDataset()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index 227c54c598f..33f64f23076 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -64,7 +64,7 @@ public UpdateDatasetVersionCommand(Dataset theDataset, DataverseRequest aRequest this.filesToDelete = new ArrayList<>(); this.clone = null; this.fmVarMet = null; - for (FileMetadata fmd : theDataset.getEditVersion().getFileMetadatas()) { + for (FileMetadata fmd : theDataset.getOrCreateEditVersion().getFileMetadatas()) { if (fmd.getDataFile().equals(fileToDelete)) { filesToDelete.add(fmd); break; @@ -114,10 +114,10 @@ public Dataset execute(CommandContext ctxt) throws CommandException { logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", getDataset().getId()); } - getDataset().getEditVersion(fmVarMet).setDatasetFields(getDataset().getEditVersion(fmVarMet).initDatasetFields()); - validateOrDie(getDataset().getEditVersion(fmVarMet), isValidateLenient()); + getDataset().getOrCreateEditVersion(fmVarMet).setDatasetFields(getDataset().getOrCreateEditVersion(fmVarMet).initDatasetFields()); + validateOrDie(getDataset().getOrCreateEditVersion(fmVarMet), isValidateLenient()); - final DatasetVersion editVersion = getDataset().getEditVersion(fmVarMet); + final DatasetVersion editVersion = getDataset().getOrCreateEditVersion(fmVarMet); DatasetFieldUtil.tidyUpFields(editVersion.getDatasetFields(), true); @@ -204,10 +204,10 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // If the datasetversion doesn't match, we have the fmd from a published version // and we need to remove the one for the newly created draft instead, so we find // it here - logger.fine("Edit ver: " + theDataset.getEditVersion().getId()); + logger.fine("Edit ver: " + theDataset.getOrCreateEditVersion().getId()); logger.fine("fmd ver: " + fmd.getDatasetVersion().getId()); - if (!theDataset.getEditVersion().equals(fmd.getDatasetVersion())) { - fmd = FileMetadataUtil.getFmdForFileInEditVersion(fmd, theDataset.getEditVersion()); + if (!theDataset.getOrCreateEditVersion().equals(fmd.getDatasetVersion())) { + fmd = FileMetadataUtil.getFmdForFileInEditVersion(fmd, theDataset.getOrCreateEditVersion()); } } fmd = ctxt.em().merge(fmd); @@ -229,21 +229,21 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // In either case, to fully remove the fmd, we have to remove any other possible // references // From the datasetversion - FileMetadataUtil.removeFileMetadataFromList(theDataset.getEditVersion().getFileMetadatas(), fmd); + FileMetadataUtil.removeFileMetadataFromList(theDataset.getOrCreateEditVersion().getFileMetadatas(), fmd); // and from the list associated with each category for (DataFileCategory cat : theDataset.getCategories()) { FileMetadataUtil.removeFileMetadataFromList(cat.getFileMetadatas(), fmd); } } - for(FileMetadata fmd: theDataset.getEditVersion().getFileMetadatas()) { + for(FileMetadata fmd: theDataset.getOrCreateEditVersion().getFileMetadatas()) { logger.fine("FMD: " + fmd.getId() + " for file: " + fmd.getDataFile().getId() + "is in final draft version"); } if (recalculateUNF) { - ctxt.ingest().recalculateDatasetVersionUNF(theDataset.getEditVersion()); + ctxt.ingest().recalculateDatasetVersionUNF(theDataset.getOrCreateEditVersion()); } - theDataset.getEditVersion().setLastUpdateTime(getTimestamp()); + theDataset.getOrCreateEditVersion().setLastUpdateTime(getTimestamp()); theDataset.setModificationTime(getTimestamp()); savedDataset = ctxt.em().merge(theDataset); diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 4bbcd653ac3..eb7632dd03c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -32,18 +32,15 @@ import edu.harvard.iq.dataverse.export.DDIExporter; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import static edu.harvard.iq.dataverse.util.SystemConfig.FQDN; -import static edu.harvard.iq.dataverse.util.SystemConfig.SITE_URL; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.xml.XmlPrinter; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.net.InetAddress; -import java.net.UnknownHostException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -1292,7 +1289,7 @@ private static void writeNotesElement(XMLStreamWriter xmlw, DatasetVersionDTO da // harvesting *all* files are encoded as otherMats; even tabular ones. private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos) throws XMLStreamException { // The preferred URL for this dataverse, for cooking up the file access API links: - String dataverseUrl = getDataverseSiteUrl(); + String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic(); for (FileDTO fileDTo : fileDtos) { // We'll continue using the scheme we've used before, in DVN2-3: non-tabular files are put into otherMat, @@ -1339,7 +1336,7 @@ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, List fileMetadatas) throws XMLStreamException { // The preferred URL for this dataverse, for cooking up the file access API links: - String dataverseUrl = getDataverseSiteUrl(); + String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic(); for (FileMetadata fileMetadata : fileMetadatas) { // We'll continue using the scheme we've used before, in DVN2-3: non-tabular files are put into otherMat, @@ -1555,33 +1552,6 @@ private static void saveJsonToDisk(String datasetVersionAsJson) throws IOExcepti Files.write(Paths.get("/tmp/out.json"), datasetVersionAsJson.getBytes()); } - /** - * The "official", designated URL of the site; - * can be defined as a complete URL; or derived from the - * "official" hostname. If none of these options is set, - * defaults to the InetAddress.getLocalHOst() and https; - */ - private static String getDataverseSiteUrl() { - String hostUrl = System.getProperty(SITE_URL); - if (hostUrl != null && !"".equals(hostUrl)) { - return hostUrl; - } - String hostName = System.getProperty(FQDN); - if (hostName == null) { - try { - hostName = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - hostName = null; - } - } - - if (hostName != null) { - return "https://" + hostName; - } - - return "http://localhost:8080"; - } - @@ -1893,7 +1863,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, DataVariable dv, FileMeta } private static void createFileDscr(XMLStreamWriter xmlw, DatasetVersion datasetVersion) throws XMLStreamException { - String dataverseUrl = getDataverseSiteUrl(); + String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic(); for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) { DataFile dataFile = fileMetadata.getDataFile(); diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index 49fe203b96d..bea3858a60e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -256,7 +256,10 @@ public static void writeCreatorsElement(XMLStreamWriter xmlw, DatasetVersionDTO creator_map.put("nameType", "Personal"); nameType_check = true; } - + // ToDo - the algorithm to determine if this is a Person or Organization here + // has been abstracted into a separate + // edu.harvard.iq.dataverse.util.PersonOrOrgUtil class that could be used here + // to avoid duplication/variants of the algorithm creatorName = Cleanup.normalize(creatorName); // Datacite algorithm, https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 if (creatorName.contains(",")) { @@ -706,6 +709,11 @@ public static void writeContributorElement(XMLStreamWriter xmlw, String contribu boolean nameType_check = false; Map contributor_map = new HashMap(); + // ToDo - the algorithm to determine if this is a Person or Organization here + // has been abstracted into a separate + // edu.harvard.iq.dataverse.util.PersonOrOrgUtil class that could be used here + // to avoid duplication/variants of the algorithm + contributorName = Cleanup.normalize(contributorName); // Datacite algorithm, https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 if (contributorName.contains(",")) { @@ -717,6 +725,9 @@ public static void writeContributorElement(XMLStreamWriter xmlw, String contribu // givenName ok contributor_map.put("nameType", "Personal"); nameType_check = true; + // re: the above toDo - the ("ContactPerson".equals(contributorType) && + // !isValidEmailAddress(contributorName)) clause in the next line could/should + // be sent as the OrgIfTied boolean parameter } else if (isOrganization || ("ContactPerson".equals(contributorType) && !isValidEmailAddress(contributorName))) { contributor_map.put("nameType", "Organizational"); } diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java index 7f94b1bbbbf..0a238eb5198 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java @@ -20,7 +20,6 @@ import javax.persistence.Id; import javax.persistence.JoinColumn; import javax.persistence.OneToMany; -import javax.persistence.Transient; /** * A specification or definition for how an external tool is intended to @@ -30,8 +29,6 @@ @Entity public class ExternalTool implements Serializable { - private static final Logger logger = Logger.getLogger(ExternalToolServiceBean.class.getCanonicalName()); - public static final String DISPLAY_NAME = "displayName"; public static final String DESCRIPTION = "description"; public static final String LEGACY_SINGLE_TYPE = "type"; @@ -41,6 +38,8 @@ public class ExternalTool implements Serializable { public static final String TOOL_PARAMETERS = "toolParameters"; public static final String CONTENT_TYPE = "contentType"; public static final String TOOL_NAME = "toolName"; + public static final String ALLOWED_API_CALLS = "allowedApiCalls"; + public static final String REQUIREMENTS = "requirements"; @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @@ -97,6 +96,23 @@ public class ExternalTool implements Serializable { @Column(nullable = true, columnDefinition = "TEXT") private String contentType; + /** + * Set of API calls the tool would like to be able to use (e,.g. for retrieving + * data through the Dataverse REST API). Used to build signedUrls for POST + * headers, as in DP Creator + */ + @Column(nullable = true, columnDefinition = "TEXT") + private String allowedApiCalls; + + /** + * When non-null, the tool has indicated that it has certain requirements + * that must be met before it should be shown to the user. This + * functionality was added for tools that operate on aux files rather than + * data files so "auxFilesExist" is one of the possible values. + */ + @Column(nullable = true, columnDefinition = "TEXT") + private String requirements; + /** * This default constructor is only here to prevent this error at * deployment: @@ -112,6 +128,10 @@ public ExternalTool() { } public ExternalTool(String displayName, String toolName, String description, List externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType) { + this(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, null, null); + } + + public ExternalTool(String displayName, String toolName, String description, List externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType, String allowedApiCalls, String requirements) { this.displayName = displayName; this.toolName = toolName; this.description = description; @@ -120,6 +140,8 @@ public ExternalTool(String displayName, String toolName, String description, Lis this.toolUrl = toolUrl; this.toolParameters = toolParameters; this.contentType = contentType; + this.allowedApiCalls = allowedApiCalls; + this.requirements = requirements; } public enum Type { @@ -273,6 +295,9 @@ public JsonObjectBuilder toJson() { if (getContentType() != null) { jab.add(CONTENT_TYPE, getContentType()); } + if (getAllowedApiCalls()!= null) { + jab.add(ALLOWED_API_CALLS,getAllowedApiCalls()); + } return jab; } @@ -298,5 +323,26 @@ public String getDisplayNameLang() { return displayName; } + /** + * @return the allowedApiCalls + */ + public String getAllowedApiCalls() { + return allowedApiCalls; + } + + /** + * @param allowedApiCalls the allowedApiCalls to set + */ + public void setAllowedApiCalls(String allowedApiCalls) { + this.allowedApiCalls = allowedApiCalls; + } + + public String getRequirements() { + return requirements; + } + + public void setRequirements(String requirements) { + this.requirements = requirements; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index 33d8c2d0d54..88a51017b75 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -4,16 +4,35 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.URLTokenUtil; -import java.io.StringReader; -import java.util.ArrayList; -import java.util.List; +import edu.harvard.iq.dataverse.util.UrlSignerUtil; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.util.Base64; +import java.util.Map.Entry; +import java.util.logging.Level; +import java.util.logging.Logger; import javax.json.Json; import javax.json.JsonArray; +import javax.json.JsonArrayBuilder; +import javax.json.JsonNumber; import javax.json.JsonObject; -import javax.json.JsonReader; +import javax.json.JsonObjectBuilder; +import javax.json.JsonString; +import javax.json.JsonValue; +import javax.ws.rs.HttpMethod; + +import org.apache.commons.codec.binary.StringUtils; /** * Handles an operation on a specific file. Requires a file id in order to be @@ -23,15 +42,26 @@ public class ExternalToolHandler extends URLTokenUtil { private final ExternalTool externalTool; + + private String requestMethod; + + public static final String HTTP_METHOD="httpMethod"; + public static final String TIMEOUT="timeOut"; + public static final String SIGNED_URL="signedUrl"; + public static final String NAME="name"; + public static final String URL_TEMPLATE="urlTemplate"; + + /** * File level tool * * @param externalTool The database entity. - * @param dataFile Required. - * @param apiToken The apiToken can be null because "explore" tools can be - * used anonymously. + * @param dataFile Required. + * @param apiToken The apiToken can be null because "explore" tools can be + * used anonymously. */ - public ExternalToolHandler(ExternalTool externalTool, DataFile dataFile, ApiToken apiToken, FileMetadata fileMetadata, String localeCode) { + public ExternalToolHandler(ExternalTool externalTool, DataFile dataFile, ApiToken apiToken, + FileMetadata fileMetadata, String localeCode) { super(dataFile, apiToken, fileMetadata, localeCode); this.externalTool = externalTool; } @@ -40,52 +70,168 @@ public ExternalToolHandler(ExternalTool externalTool, DataFile dataFile, ApiToke * Dataset level tool * * @param externalTool The database entity. - * @param dataset Required. - * @param apiToken The apiToken can be null because "explore" tools can be - * used anonymously. + * @param dataset Required. + * @param apiToken The apiToken can be null because "explore" tools can be + * used anonymously. */ public ExternalToolHandler(ExternalTool externalTool, Dataset dataset, ApiToken apiToken, String localeCode) { super(dataset, apiToken, localeCode); this.externalTool = externalTool; } - // TODO: rename to handleRequest() to someday handle sending headers as well as query parameters. - public String getQueryParametersForUrl() { - return getQueryParametersForUrl(false); + public String handleRequest() { + return handleRequest(false); } - - // TODO: rename to handleRequest() to someday handle sending headers as well as query parameters. - public String getQueryParametersForUrl(boolean preview) { - String toolParameters = externalTool.getToolParameters(); - JsonReader jsonReader = Json.createReader(new StringReader(toolParameters)); - JsonObject obj = jsonReader.readObject(); - JsonArray queryParams = obj.getJsonArray("queryParameters"); - if (queryParams == null || queryParams.isEmpty()) { - return ""; - } - List params = new ArrayList<>(); - queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> { - queryParam.keySet().forEach((key) -> { - String value = queryParam.getString(key); - String param = getQueryParam(key, value); - if (param != null && !param.isEmpty()) { - params.add(param); + + public String handleRequest(boolean preview) { + JsonObject toolParameters = JsonUtil.getJsonObject(externalTool.getToolParameters()); + JsonString method = toolParameters.getJsonString(HTTP_METHOD); + requestMethod = method != null ? method.getString() : HttpMethod.GET; + JsonObject params = getParams(toolParameters); + logger.fine("Found params: " + JsonUtil.prettyPrint(params)); + if (requestMethod.equals(HttpMethod.GET)) { + String paramsString = ""; + if (externalTool.getAllowedApiCalls() == null) { + // Legacy, using apiKey + logger.fine("Legacy Case"); + + for (Entry entry : params.entrySet()) { + paramsString = paramsString + (paramsString.isEmpty() ? "?" : "&") + entry.getKey() + "="; + JsonValue val = entry.getValue(); + if (val.getValueType().equals(JsonValue.ValueType.NUMBER)) { + paramsString += ((JsonNumber) val).intValue(); + } else { + paramsString += ((JsonString) val).getString(); + } } - }); - }); - if (!preview) { - return "?" + String.join("&", params); + } else { + //Send a signed callback to get params and signedURLs + String callback = null; + switch (externalTool.getScope()) { + case DATASET: + callback=SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/" + + dataset.getId() + "/versions/:latest/toolparams/" + externalTool.getId(); + case FILE: + callback= SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/files/" + + dataFile.getId() + "/metadata/" + fileMetadata.getId() + "/toolparams/" + + externalTool.getId(); + } + if (apiToken != null) { + callback = UrlSignerUtil.signUrl(callback, 5, apiToken.getAuthenticatedUser().getUserIdentifier(), HttpMethod.GET, + JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + apiToken.getTokenString()); + } + paramsString= "?callback=" + Base64.getEncoder().encodeToString(StringUtils.getBytesUtf8(callback)); + if (getLocaleCode() != null) { + paramsString += "&locale=" + getLocaleCode(); + } + } + if (preview) { + paramsString += "&preview=true"; + } + logger.fine("GET return is: " + paramsString); + return paramsString; + } else { - return "?" + String.join("&", params) + "&preview=true"; + // ToDo - if the allowedApiCalls() are defined, could/should we send them to + // tools using GET as well? + + if (requestMethod.equals(HttpMethod.POST)) { + String body = JsonUtil.prettyPrint(createPostBody(params).build()); + try { + logger.info("POST Body: " + body); + return postFormData(body); + } catch (IOException | InterruptedException ex) { + Logger.getLogger(ExternalToolHandler.class.getName()).log(Level.SEVERE, null, ex); + } + } } + return null; + } + + public JsonObject getParams(JsonObject toolParameters) { + //ToDo - why an array of object each with a single key/value pair instead of one object? + JsonArray queryParams = toolParameters.getJsonArray("queryParameters"); + + // ToDo return json and print later + JsonObjectBuilder paramsBuilder = Json.createObjectBuilder(); + if (!(queryParams == null) && !queryParams.isEmpty()) { + queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> { + queryParam.keySet().forEach((key) -> { + String value = queryParam.getString(key); + JsonValue param = getParam(value); + if (param != null) { + paramsBuilder.add(key, param); + } + }); + }); + } + return paramsBuilder.build(); + } + + public JsonObjectBuilder createPostBody(JsonObject params) { + JsonObjectBuilder bodyBuilder = Json.createObjectBuilder(); + bodyBuilder.add("queryParameters", params); + String apiCallStr = externalTool.getAllowedApiCalls(); + if (apiCallStr != null && !apiCallStr.isBlank()) { + JsonArray apiArray = JsonUtil.getJsonArray(externalTool.getAllowedApiCalls()); + JsonArrayBuilder apisBuilder = Json.createArrayBuilder(); + apiArray.getValuesAs(JsonObject.class).forEach(((apiObj) -> { + logger.fine(JsonUtil.prettyPrint(apiObj)); + String name = apiObj.getJsonString(NAME).getString(); + String httpmethod = apiObj.getJsonString(HTTP_METHOD).getString(); + int timeout = apiObj.getInt(TIMEOUT); + String urlTemplate = apiObj.getJsonString(URL_TEMPLATE).getString(); + logger.fine("URL Template: " + urlTemplate); + urlTemplate = SystemConfig.getDataverseSiteUrlStatic() + urlTemplate; + String apiPath = replaceTokensWithValues(urlTemplate); + logger.fine("URL WithTokens: " + apiPath); + String url = apiPath; + // Sign if apiToken exists, otherwise send unsigned URL (i.e. for guest users) + ApiToken apiToken = getApiToken(); + if (apiToken != null) { + url = UrlSignerUtil.signUrl(apiPath, timeout, apiToken.getAuthenticatedUser().getUserIdentifier(), + httpmethod, JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + + getApiToken().getTokenString()); + } + logger.fine("Signed URL: " + url); + apisBuilder.add(Json.createObjectBuilder().add(NAME, name).add(HTTP_METHOD, httpmethod) + .add(SIGNED_URL, url).add(TIMEOUT, timeout)); + })); + bodyBuilder.add("signedUrls", apisBuilder); + } + return bodyBuilder; + } + + private String postFormData(String allowedApis) throws IOException, InterruptedException { + String url = null; + HttpClient client = HttpClient.newHttpClient(); + HttpRequest request = HttpRequest.newBuilder().POST(HttpRequest.BodyPublishers.ofString(allowedApis)) + .uri(URI.create(externalTool.getToolUrl())).header("Content-Type", "application/json").build(); + HttpResponse response = client.send(request, HttpResponse.BodyHandlers.ofString()); + boolean redirect = false; + int status = response.statusCode(); + if (status != HttpURLConnection.HTTP_OK) { + if (status == HttpURLConnection.HTTP_MOVED_TEMP || status == HttpURLConnection.HTTP_MOVED_PERM + || status == HttpURLConnection.HTTP_SEE_OTHER) { + redirect = true; + } + } + if (redirect == true) { + String newUrl = response.headers().firstValue("location").get(); +// toolContext = "http://" + response.uri().getAuthority(); + + url = newUrl; + } + return url; } public String getToolUrlWithQueryParams() { - return externalTool.getToolUrl() + getQueryParametersForUrl(); + String params = ExternalToolHandler.this.handleRequest(); + return externalTool.getToolUrl() + params; } - + public String getToolUrlForPreviewMode() { - return externalTool.getToolUrl() + getQueryParametersForUrl(true); + return externalTool.getToolUrl() + handleRequest(true); } public ExternalTool getExternalTool() { @@ -97,9 +243,9 @@ public void setApiToken(ApiToken apiToken) { } /** - * @return Returns Javascript that opens the explore tool in a new browser - * tab if the browser allows it.If not, it shows an alert that popups must - * be enabled in the browser. + * @return Returns Javascript that opens the explore tool in a new browser tab + * if the browser allows it.If not, it shows an alert that popups must + * be enabled in the browser. */ public String getExploreScript() { String toolUrl = this.getToolUrlWithQueryParams(); diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java index d49d66c26f7..f38cd7301ee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java @@ -1,11 +1,14 @@ package edu.harvard.iq.dataverse.externaltools; +import edu.harvard.iq.dataverse.AuxiliaryFile; +import edu.harvard.iq.dataverse.AuxiliaryFileServiceBean; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFileServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.externaltools.ExternalTool.Type; import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.URLTokenUtil.ReservedWord; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.externaltools.ExternalTool.Scope; import java.io.StringReader; @@ -29,6 +32,8 @@ import static edu.harvard.iq.dataverse.externaltools.ExternalTool.*; import java.util.stream.Collectors; import java.util.stream.Stream; +import javax.ejb.EJB; +import javax.json.JsonValue; @Stateless @Named @@ -39,6 +44,9 @@ public class ExternalToolServiceBean { @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; + @EJB + AuxiliaryFileServiceBean auxiliaryFileService; + public List findAll() { TypedQuery typedQuery = em.createQuery("SELECT OBJECT(o) FROM ExternalTool AS o ORDER BY o.id", ExternalTool.class); return typedQuery.getResultList(); @@ -132,13 +140,13 @@ public ExternalTool save(ExternalTool externalTool) { * file supports The list of tools is passed in so it doesn't hit the * database each time */ - public static List findExternalToolsByFile(List allExternalTools, DataFile file) { + public List findExternalToolsByFile(List allExternalTools, DataFile file) { List externalTools = new ArrayList<>(); //Map tabular data to it's mimetype (the isTabularData() check assures that this code works the same as before, but it may need to change if tabular data is split into subtypes with differing mimetypes) final String contentType = file.isTabularData() ? DataFileServiceBean.MIME_TYPE_TSV_ALT : file.getContentType(); allExternalTools.forEach((externalTool) -> { - //Match tool and file type - if (contentType.equals(externalTool.getContentType())) { + //Match tool and file type, then check requirements + if (contentType.equals(externalTool.getContentType()) && meetsRequirements(externalTool, file)) { externalTools.add(externalTool); } }); @@ -146,13 +154,37 @@ public static List findExternalToolsByFile(List allE return externalTools; } + public boolean meetsRequirements(ExternalTool externalTool, DataFile dataFile) { + String requirements = externalTool.getRequirements(); + if (requirements == null) { + logger.fine("Data file id" + dataFile.getId() + ": no requirements for tool id " + externalTool.getId()); + return true; + } + boolean meetsRequirements = true; + JsonObject requirementsObj = JsonUtil.getJsonObject(requirements); + JsonArray auxFilesExist = requirementsObj.getJsonArray("auxFilesExist"); + for (JsonValue jsonValue : auxFilesExist) { + String formatTag = jsonValue.asJsonObject().getString("formatTag"); + String formatVersion = jsonValue.asJsonObject().getString("formatVersion"); + AuxiliaryFile auxFile = auxiliaryFileService.lookupAuxiliaryFile(dataFile, formatTag, formatVersion); + if (auxFile == null) { + logger.fine("Data file id" + dataFile.getId() + ": cannot find required aux file. formatTag=" + formatTag + ". formatVersion=" + formatVersion); + meetsRequirements = false; + break; + } else { + logger.fine("Data file id" + dataFile.getId() + ": found required aux file. formatTag=" + formatTag + ". formatVersion=" + formatVersion); + meetsRequirements = true; + } + } + return meetsRequirements; + } + public static ExternalTool parseAddExternalToolManifest(String manifest) { if (manifest == null || manifest.isEmpty()) { throw new IllegalArgumentException("External tool manifest was null or empty!"); } - JsonReader jsonReader = Json.createReader(new StringReader(manifest)); - JsonObject jsonObject = jsonReader.readObject(); + JsonObject jsonObject = JsonUtil.getJsonObject(manifest); //Note: ExternalToolServiceBeanTest tests are dependent on the order of these retrievals String displayName = getRequiredTopLevelField(jsonObject, DISPLAY_NAME); String toolName = getOptionalTopLevelField(jsonObject, TOOL_NAME); @@ -169,6 +201,9 @@ public static ExternalTool parseAddExternalToolManifest(String manifest) { String toolUrl = getRequiredTopLevelField(jsonObject, TOOL_URL); JsonObject toolParametersObj = jsonObject.getJsonObject(TOOL_PARAMETERS); JsonArray queryParams = toolParametersObj.getJsonArray("queryParameters"); + JsonArray allowedApiCallsArray = jsonObject.getJsonArray(ALLOWED_API_CALLS); + JsonObject requirementsObj = jsonObject.getJsonObject(REQUIREMENTS); + boolean allRequiredReservedWordsFound = false; if (scope.equals(Scope.FILE)) { List requiredReservedWordCandidates = new ArrayList<>(); @@ -221,8 +256,16 @@ public static ExternalTool parseAddExternalToolManifest(String manifest) { } String toolParameters = toolParametersObj.toString(); + String allowedApiCalls = null; + if(allowedApiCallsArray !=null) { + allowedApiCalls = allowedApiCallsArray.toString(); + } + String requirements = null; + if (requirementsObj != null) { + requirements = requirementsObj.toString(); + } - return new ExternalTool(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType); + return new ExternalTool(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, allowedApiCalls, requirements); } private static String getRequiredTopLevelField(JsonObject jsonObject, String key) { diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/ClientHarvestRun.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/ClientHarvestRun.java index 0dc94f835e9..50d06807a13 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/ClientHarvestRun.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/ClientHarvestRun.java @@ -40,12 +40,13 @@ public void setId(Long id) { this.id = id; } - public enum RunResultType { SUCCESS, FAILURE, INPROGRESS }; + public enum RunResultType { SUCCESS, FAILURE, INPROGRESS, INTERRUPTED }; private static String RESULT_LABEL_SUCCESS = "SUCCESS"; private static String RESULT_LABEL_FAILURE = "FAILED"; private static String RESULT_LABEL_INPROGRESS = "IN PROGRESS"; private static String RESULT_DELETE_IN_PROGRESS = "DELETE IN PROGRESS"; + private static String RESULT_LABEL_INTERRUPTED = "INTERRUPTED"; @ManyToOne @JoinColumn(nullable = false) @@ -76,6 +77,8 @@ public String getResultLabel() { return RESULT_LABEL_FAILURE; } else if (isInProgress()) { return RESULT_LABEL_INPROGRESS; + } else if (isInterrupted()) { + return RESULT_LABEL_INTERRUPTED; } return null; } @@ -84,8 +87,8 @@ public String getDetailedResultLabel() { if (harvestingClient != null && harvestingClient.isDeleteInProgress()) { return RESULT_DELETE_IN_PROGRESS; } - if (isSuccess()) { - String resultLabel = RESULT_LABEL_SUCCESS; + if (isSuccess() || isInterrupted()) { + String resultLabel = getResultLabel(); resultLabel = resultLabel.concat("; "+harvestedDatasetCount+" harvested, "); resultLabel = resultLabel.concat(deletedDatasetCount+" deleted, "); @@ -128,6 +131,14 @@ public void setInProgress() { harvestResult = RunResultType.INPROGRESS; } + public boolean isInterrupted() { + return RunResultType.INTERRUPTED == harvestResult; + } + + public void setInterrupted() { + harvestResult = RunResultType.INTERRUPTED; + } + // Time of this harvest attempt: @Temporal(value = TemporalType.TIMESTAMP) private Date startTime; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java index 5b3e4df331d..402d0d8ef91 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java @@ -19,8 +19,8 @@ */ package edu.harvard.iq.dataverse.harvest.client; +import edu.harvard.iq.dataverse.harvest.client.oai.OaiHandler; import java.io.IOException; -import java.io.FileNotFoundException; import java.io.InputStream; import java.io.StringReader; @@ -31,9 +31,14 @@ import java.io.FileOutputStream; import java.io.PrintWriter; -import java.net.HttpURLConnection; +import static java.net.HttpURLConnection.HTTP_OK; import java.net.MalformedURLException; -import java.net.URL; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.util.Map; +import java.util.Optional; import java.util.zip.GZIPInputStream; import java.util.zip.InflaterInputStream; @@ -84,17 +89,18 @@ public class FastGetRecord { /** * Client-side GetRecord verb constructor * - * @param baseURL the baseURL of the server to be queried + * @param oaiHandler the configured OaiHande running this harvest + * @param identifier Record identifier + * @param httpClient jdk HttpClient used to make http requests * @exception MalformedURLException the baseURL is bad * @exception SAXException the xml response is bad * @exception IOException an I/O error occurred + * @exception TransformerException if it fails to parse the service portion of the record */ - public FastGetRecord(String baseURL, String identifier, String metadataPrefix) - throws IOException, ParserConfigurationException, SAXException, + public FastGetRecord(OaiHandler oaiHandler, String identifier, HttpClient httpClient) throws IOException, ParserConfigurationException, SAXException, TransformerException { - harvestRecord (baseURL, identifier, metadataPrefix); - + harvestRecord (oaiHandler.getBaseOaiUrl(), identifier, oaiHandler.getMetadataPrefix(), oaiHandler.getCustomHeaders(), httpClient); } private String errorMessage = null; @@ -117,57 +123,63 @@ public boolean isDeleted () { } - public void harvestRecord(String baseURL, String identifier, String metadataPrefix) throws IOException, - ParserConfigurationException, SAXException, TransformerException { + public void harvestRecord(String baseURL, String identifier, String metadataPrefix, Map customHeaders, HttpClient httpClient) throws IOException, + ParserConfigurationException, SAXException, TransformerException{ xmlInputFactory = javax.xml.stream.XMLInputFactory.newInstance(); - String requestURL = getRequestURL(baseURL, identifier, metadataPrefix); + InputStream in; + + // This was one other place where the Harvester code was still using + // the obsolete java.net.ttpUrlConnection that didn't get replaced with + // the new java.net.http.HttpClient during the first pas of the XOAI + // rewrite. (L.A.) - InputStream in = null; - URL url = new URL(requestURL); - HttpURLConnection con = null; - int responseCode = 0; - - con = (HttpURLConnection) url.openConnection(); - con.setRequestProperty("User-Agent", "DataverseHarvester/3.0"); - con.setRequestProperty("Accept-Encoding", - "compress, gzip, identify"); - try { - responseCode = con.getResponseCode(); - //logger.debug("responseCode=" + responseCode); - } catch (FileNotFoundException e) { - //logger.info(requestURL, e); - responseCode = HttpURLConnection.HTTP_UNAVAILABLE; - } - - // TODO: -- L.A. - // - // support for cookies; - // support for limited retry attempts -- ? - // implement reading of the stream as filterinputstream -- ? - // -- that could make it a little faster still. -- L.A. - - - - if (responseCode == 200) { - - String contentEncoding = con.getHeaderField("Content-Encoding"); - //logger.debug("contentEncoding=" + contentEncoding); - - // support for the standard compress/gzip/deflate compression - // schemes: - if ("compress".equals(contentEncoding)) { - ZipInputStream zis = new ZipInputStream(con.getInputStream()); - zis.getNextEntry(); - in = zis; - } else if ("gzip".equals(contentEncoding)) { - in = new GZIPInputStream(con.getInputStream()); - } else if ("deflate".equals(contentEncoding)) { - in = new InflaterInputStream(con.getInputStream()); - } else { - in = con.getInputStream(); + if (httpClient == null) { + throw new IOException("Null Http Client, cannot make a GetRecord call to obtain the metadata."); + } + + HttpRequest.Builder requestBuilder = HttpRequest.newBuilder() + .uri(URI.create(requestURL)) + .GET() + .header("User-Agent", "XOAI Service Provider v5 (Dataverse)") + .header("Accept-Encoding", "compress, gzip"); + + if (customHeaders != null) { + for (String headerName : customHeaders.keySet()) { + requestBuilder.header(headerName, customHeaders.get(headerName)); + } + } + + HttpRequest request = requestBuilder.build(); + HttpResponse response; + + try { + response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new IOException("Failed to connect to the remote dataverse server to obtain GetRecord metadata"); + } + + int responseCode = response.statusCode(); + + if (responseCode == HTTP_OK) { + InputStream inputStream = response.body(); + Optional contentEncoding = response.headers().firstValue("Content-Encoding"); + + // support for the standard gzip encoding: + in = inputStream; + if (contentEncoding.isPresent()) { + if (contentEncoding.get().equals("compress")) { + ZipInputStream zis = new ZipInputStream(inputStream); + zis.getNextEntry(); + in = zis; + } else if (contentEncoding.get().equals("gzip")) { + in = new GZIPInputStream(inputStream); + } else if (contentEncoding.get().equals("deflate")) { + in = new InflaterInputStream(inputStream); + } } // We are going to read the OAI header and SAX-parse it for the @@ -185,9 +197,7 @@ public void harvestRecord(String baseURL, String identifier, String metadataPref FileOutputStream tempFileStream = null; PrintWriter metadataOut = null; - savedMetadataFile = File.createTempFile("meta", ".tmp"); - - + savedMetadataFile = File.createTempFile("meta", ".tmp"); int mopen = 0; int mclose = 0; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java index e7156dfe9aa..40bd45ecb30 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java @@ -48,6 +48,9 @@ import java.net.http.HttpClient; import java.net.http.HttpRequest; import java.net.http.HttpResponse; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.nio.file.Path; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; @@ -85,6 +88,7 @@ public class HarvesterServiceBean { public static final String HARVEST_RESULT_FAILED="failed"; public static final String DATAVERSE_PROPRIETARY_METADATA_FORMAT="dataverse_json"; public static final String DATAVERSE_PROPRIETARY_METADATA_API="/api/datasets/export?exporter="+DATAVERSE_PROPRIETARY_METADATA_FORMAT+"&persistentId="; + public static final String DATAVERSE_HARVEST_STOP_FILE="../logs/stopharvest_"; public HarvesterServiceBean() { @@ -130,7 +134,7 @@ public List getHarvestTimers() { } /** - * Run a harvest for an individual harvesting Dataverse + * Run a harvest for an individual harvesting client * @param dataverseRequest * @param harvestingClientId * @throws IOException @@ -141,12 +145,9 @@ public void doHarvest(DataverseRequest dataverseRequest, Long harvestingClientId if (harvestingClientConfig == null) { throw new IOException("No such harvesting client: id="+harvestingClientId); } - - Dataverse harvestingDataverse = harvestingClientConfig.getDataverse(); - - MutableBoolean harvestErrorOccurred = new MutableBoolean(false); + String logTimestamp = logFormatter.format(new Date()); - Logger hdLogger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean." + harvestingDataverse.getAlias() + logTimestamp); + Logger hdLogger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean." + harvestingClientConfig.getName() + logTimestamp); String logFileName = "../logs" + File.separator + "harvest_" + harvestingClientConfig.getName() + "_" + logTimestamp + ".log"; FileHandler fileHandler = new FileHandler(logFileName); hdLogger.setUseParentHandlers(false); @@ -155,21 +156,15 @@ public void doHarvest(DataverseRequest dataverseRequest, Long harvestingClientId PrintWriter importCleanupLog = new PrintWriter(new FileWriter( "../logs/harvest_cleanup_" + harvestingClientConfig.getName() + "_" + logTimestamp+".txt")); - List harvestedDatasetIds = null; - - List harvestedDatasetIdsThisBatch = new ArrayList(); - - List failedIdentifiers = new ArrayList(); - List deletedIdentifiers = new ArrayList(); + List harvestedDatasetIds = new ArrayList<>(); + List failedIdentifiers = new ArrayList<>(); + List deletedIdentifiers = new ArrayList<>(); Date harvestStartTime = new Date(); try { - boolean harvestingNow = harvestingClientConfig.isHarvestingNow(); - - if (harvestingNow) { - harvestErrorOccurred.setValue(true); - hdLogger.log(Level.SEVERE, "Cannot begin harvesting, Dataverse " + harvestingDataverse.getName() + " is currently being harvested."); + if (harvestingClientConfig.isHarvestingNow()) { + hdLogger.log(Level.SEVERE, "Cannot start harvest, client " + harvestingClientConfig.getName() + " is already harvesting."); } else { harvestingClientService.resetHarvestInProgress(harvestingClientId); @@ -177,7 +172,7 @@ public void doHarvest(DataverseRequest dataverseRequest, Long harvestingClientId if (harvestingClientConfig.isOai()) { - harvestedDatasetIds = harvestOAI(dataverseRequest, harvestingClientConfig, hdLogger, importCleanupLog, harvestErrorOccurred, failedIdentifiers, deletedIdentifiers, harvestedDatasetIdsThisBatch); + harvestOAI(dataverseRequest, harvestingClientConfig, hdLogger, importCleanupLog, failedIdentifiers, deletedIdentifiers, harvestedDatasetIds); } else { throw new IOException("Unsupported harvest type"); @@ -187,18 +182,17 @@ public void doHarvest(DataverseRequest dataverseRequest, Long harvestingClientId hdLogger.log(Level.INFO, "Datasets created/updated: " + harvestedDatasetIds.size() + ", datasets deleted: " + deletedIdentifiers.size() + ", datasets failed: " + failedIdentifiers.size()); } + } catch (StopHarvestException she) { + hdLogger.log(Level.INFO, "HARVEST INTERRUPTED BY EXTERNAL REQUEST"); + harvestingClientService.setPartiallyCompleted(harvestingClientId, new Date(), harvestedDatasetIds.size(), failedIdentifiers.size(), deletedIdentifiers.size()); } catch (Throwable e) { - harvestErrorOccurred.setValue(true); + // Any other exception should be treated as a complete failure String message = "Exception processing harvest, server= " + harvestingClientConfig.getHarvestingUrl() + ",format=" + harvestingClientConfig.getMetadataPrefix() + " " + e.getClass().getName() + " " + e.getMessage(); hdLogger.log(Level.SEVERE, message); logException(e, hdLogger); hdLogger.log(Level.INFO, "HARVEST NOT COMPLETED DUE TO UNEXPECTED ERROR."); - // TODO: - // even though this harvesting run failed, we may have had successfully - // processed some number of datasets, by the time the exception was thrown. - // We should record that number too. And the number of the datasets that - // had failed, that we may have counted. -- L.A. 4.4 - harvestingClientService.setHarvestFailure(harvestingClientId, new Date()); + + harvestingClientService.setHarvestFailure(harvestingClientId, new Date(), harvestedDatasetIds.size(), failedIdentifiers.size(), deletedIdentifiers.size()); } finally { harvestingClientService.resetHarvestInProgress(harvestingClientId); @@ -215,12 +209,11 @@ public void doHarvest(DataverseRequest dataverseRequest, Long harvestingClientId * @param harvestErrorOccurred have we encountered any errors during harvest? * @param failedIdentifiers Study Identifiers for failed "GetRecord" requests */ - private List harvestOAI(DataverseRequest dataverseRequest, HarvestingClient harvestingClient, Logger hdLogger, PrintWriter importCleanupLog, MutableBoolean harvestErrorOccurred, List failedIdentifiers, List deletedIdentifiers, List harvestedDatasetIdsThisBatch) - throws IOException, ParserConfigurationException, SAXException, TransformerException { + private void harvestOAI(DataverseRequest dataverseRequest, HarvestingClient harvestingClient, Logger hdLogger, PrintWriter importCleanupLog, List failedIdentifiers, List deletedIdentifiers, List harvestedDatasetIds) + throws IOException, ParserConfigurationException, SAXException, TransformerException, StopHarvestException { logBeginOaiHarvest(hdLogger, harvestingClient); - List harvestedDatasetIds = new ArrayList(); OaiHandler oaiHandler; HttpClient httpClient = null; @@ -235,14 +228,16 @@ private List harvestOAI(DataverseRequest dataverseRequest, HarvestingClien throw new IOException(errorMessage); } - if (DATAVERSE_PROPRIETARY_METADATA_FORMAT.equals(oaiHandler.getMetadataPrefix())) { - // If we are harvesting native Dataverse json, we'll also need this - // jdk http client to make direct calls to the remote Dataverse API: - httpClient = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.ALWAYS).build(); - } + // We will use this jdk http client to make direct calls to the remote + // OAI (or remote Dataverse API) to obtain the metadata records + httpClient = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.ALWAYS).build(); try { for (Iterator
idIter = oaiHandler.runListIdentifiers(); idIter.hasNext();) { + // Before each iteration, check if this harvesting job needs to be aborted: + if (checkIfStoppingJob(harvestingClient)) { + throw new StopHarvestException("Harvesting stopped by external request"); + } Header h = idIter.next(); String identifier = h.getIdentifier(); @@ -265,18 +260,11 @@ private List harvestOAI(DataverseRequest dataverseRequest, HarvestingClien if (datasetId != null) { harvestedDatasetIds.add(datasetId); - - if ( harvestedDatasetIdsThisBatch == null ) { - harvestedDatasetIdsThisBatch = new ArrayList(); - } - harvestedDatasetIdsThisBatch.add(datasetId); - } if (getRecordErrorOccurred.booleanValue() == true) { failedIdentifiers.add(identifier); - harvestErrorOccurred.setValue(true); - //temporary: + //can be uncommented out for testing failure handling: //throw new IOException("Exception occured, stopping harvest"); } } @@ -286,8 +274,6 @@ private List harvestOAI(DataverseRequest dataverseRequest, HarvestingClien logCompletedOaiHarvest(hdLogger, harvestingClient); - return harvestedDatasetIds; - } private Long processRecord(DataverseRequest dataverseRequest, Logger hdLogger, PrintWriter importCleanupLog, OaiHandler oaiHandler, String identifier, MutableBoolean recordErrorOccurred, List deletedIdentifiers, Date dateStamp, HttpClient httpClient) { @@ -303,11 +289,11 @@ private Long processRecord(DataverseRequest dataverseRequest, Logger hdLogger, P // Make direct call to obtain the proprietary Dataverse metadata // in JSON from the remote Dataverse server: String metadataApiUrl = oaiHandler.getProprietaryDataverseMetadataURL(identifier); - logger.info("calling "+metadataApiUrl); + logger.fine("calling "+metadataApiUrl); tempFile = retrieveProprietaryDataverseMetadata(httpClient, metadataApiUrl); } else { - FastGetRecord record = oaiHandler.runGetRecord(identifier); + FastGetRecord record = oaiHandler.runGetRecord(identifier, httpClient); errMessage = record.getErrorMessage(); deleted = record.isDeleted(); tempFile = record.getMetadataFile(); @@ -372,7 +358,7 @@ File retrieveProprietaryDataverseMetadata (HttpClient client, String remoteApiUr HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(remoteApiUrl)) .GET() - .header("User-Agent", "DataverseHarvester/6.0") + .header("User-Agent", "XOAI Service Provider v5 (Dataverse)") .build(); HttpResponse response; @@ -410,6 +396,26 @@ private void deleteHarvestedDatasetIfExists(String persistentIdentifier, Dataver } hdLogger.info("No dataset found for " + persistentIdentifier + ", skipping delete. "); } + + private boolean checkIfStoppingJob(HarvestingClient harvestingClient) { + Long pid = ProcessHandle.current().pid(); + String stopFileName = DATAVERSE_HARVEST_STOP_FILE + harvestingClient.getName() + "." + pid; + Path stopFilePath = Paths.get(stopFileName); + + if (Files.exists(stopFilePath)) { + // Now that we know that the file is there, let's (try to) delete it, + // so that the harvest can be re-run. + try { + Files.delete(stopFilePath); + } catch (IOException ioex) { + // No need to treat this is a big deal (could be a permission, etc.) + logger.warning("Failed to delete the flag file "+stopFileName + "; check permissions and delete manually."); + } + return true; + } + + return false; + } private void logBeginOaiHarvest(Logger hdLogger, HarvestingClient harvestingClient) { hdLogger.log(Level.INFO, "BEGIN HARVEST, oaiUrl=" diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java index 32365e17852..d27ddc41b7f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java @@ -188,7 +188,9 @@ public String getHarvestingUrl() { } public void setHarvestingUrl(String harvestingUrl) { - this.harvestingUrl = harvestingUrl.trim(); + if (harvestingUrl != null) { + this.harvestingUrl = harvestingUrl.trim(); + } } private String archiveUrl; @@ -232,6 +234,16 @@ public void setMetadataPrefix(String metadataPrefix) { this.metadataPrefix = metadataPrefix; } + private String customHttpHeaders; + + public String getCustomHttpHeaders() { + return customHttpHeaders; + } + + public void setCustomHttpHeaders(String customHttpHeaders) { + this.customHttpHeaders = customHttpHeaders; + } + // TODO: do we need "orphanRemoval=true"? -- L.A. 4.4 // TODO: should it be @OrderBy("startTime")? -- L.A. 4.4 @OneToMany(mappedBy="harvestingClient", cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) @@ -343,95 +355,7 @@ public Long getLastDeletedDatasetCount() { return lastNonEmptyHarvest.getDeletedDatasetCount(); } return null; - } - - /* move the fields below to the new HarvestingClientRun class: - private String harvestResult; - - public String getResult() { - return harvestResult; - } - - public void setResult(String harvestResult) { - this.harvestResult = harvestResult; - } - - // "Last Harvest Time" is the last time we *attempted* to harvest - // from this remote resource. - // It wasn't necessarily a successful attempt! - - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastHarvestTime; - - public Date getLastHarvestTime() { - return lastHarvestTime; - } - - public void setLastHarvestTime(Date lastHarvestTime) { - this.lastHarvestTime = lastHarvestTime; - } - - // This is the last "successful harvest" - i.e., the last time we - // tried to harvest, and got a response from the remote server. - // We may not have necessarily harvested any useful content though; - // the result may have been a "no content" or "no changes since the last harvest" - // response. - - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastSuccessfulHarvestTime; - - public Date getLastSuccessfulHarvestTime() { - return lastSuccessfulHarvestTime; - } - - public void setLastSuccessfulHarvestTime(Date lastSuccessfulHarvestTime) { - this.lastSuccessfulHarvestTime = lastSuccessfulHarvestTime; - } - - // Finally, this is the time stamp from the last "non-empty" harvest. - // I.e. the last time we ran a harvest that actually resulted in - // some Datasets created, updated or deleted: - - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastNonEmptyHarvestTime; - - public Date getLastNonEmptyHarvestTime() { - return lastNonEmptyHarvestTime; - } - - public void setLastNonEmptyHarvestTime(Date lastNonEmptyHarvestTime) { - this.lastNonEmptyHarvestTime = lastNonEmptyHarvestTime; - } - - // And these are the Dataset counts from that last "non-empty" harvest: - private Long harvestedDatasetCount; - private Long failedDatasetCount; - private Long deletedDatasetCount; - - public Long getLastHarvestedDatasetCount() { - return harvestedDatasetCount; - } - - public void setHarvestedDatasetCount(Long harvestedDatasetCount) { - this.harvestedDatasetCount = harvestedDatasetCount; - } - - public Long getLastFailedDatasetCount() { - return failedDatasetCount; - } - - public void setFailedDatasetCount(Long failedDatasetCount) { - this.failedDatasetCount = failedDatasetCount; - } - - public Long getLastDeletedDatasetCount() { - return deletedDatasetCount; - } - - public void setDeletedDatasetCount(Long deletedDatasetCount) { - this.deletedDatasetCount = deletedDatasetCount; - } - */ + } private boolean scheduled; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java index 0af73550190..13cc44ce919 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java @@ -167,28 +167,20 @@ public void deleteClient(Long clientId) { @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void setHarvestSuccess(Long hcId, Date currentTime, int harvestedCount, int failedCount, int deletedCount) { - HarvestingClient harvestingClient = em.find(HarvestingClient.class, hcId); - if (harvestingClient == null) { - return; - } - em.refresh(harvestingClient); - - ClientHarvestRun currentRun = harvestingClient.getLastRun(); - - if (currentRun != null && currentRun.isInProgress()) { - // TODO: what if there's no current run in progress? should we just - // give up quietly, or should we make a noise of some kind? -- L.A. 4.4 - - currentRun.setSuccess(); - currentRun.setFinishTime(currentTime); - currentRun.setHarvestedDatasetCount(new Long(harvestedCount)); - currentRun.setFailedDatasetCount(new Long(failedCount)); - currentRun.setDeletedDatasetCount(new Long(deletedCount)); - } + recordHarvestJobStatus(hcId, currentTime, harvestedCount, failedCount, deletedCount, ClientHarvestRun.RunResultType.SUCCESS); } @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void setHarvestFailure(Long hcId, Date currentTime) { + public void setHarvestFailure(Long hcId, Date currentTime, int harvestedCount, int failedCount, int deletedCount) { + recordHarvestJobStatus(hcId, currentTime, harvestedCount, failedCount, deletedCount, ClientHarvestRun.RunResultType.FAILURE); + } + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void setPartiallyCompleted(Long hcId, Date finishTime, int harvestedCount, int failedCount, int deletedCount) { + recordHarvestJobStatus(hcId, finishTime, harvestedCount, failedCount, deletedCount, ClientHarvestRun.RunResultType.INTERRUPTED); + } + + public void recordHarvestJobStatus(Long hcId, Date finishTime, int harvestedCount, int failedCount, int deletedCount, ClientHarvestRun.RunResultType result) { HarvestingClient harvestingClient = em.find(HarvestingClient.class, hcId); if (harvestingClient == null) { return; @@ -198,28 +190,40 @@ public void setHarvestFailure(Long hcId, Date currentTime) { ClientHarvestRun currentRun = harvestingClient.getLastRun(); if (currentRun != null && currentRun.isInProgress()) { - // TODO: what if there's no current run in progress? should we just - // give up quietly, or should we make a noise of some kind? -- L.A. 4.4 - currentRun.setFailed(); - currentRun.setFinishTime(currentTime); + currentRun.setResult(result); + currentRun.setFinishTime(finishTime); + currentRun.setHarvestedDatasetCount(Long.valueOf(harvestedCount)); + currentRun.setFailedDatasetCount(Long.valueOf(failedCount)); + currentRun.setDeletedDatasetCount(Long.valueOf(deletedCount)); } - } + } + + public Long getNumberOfHarvestedDatasetsByAllClients() { + try { + return (Long) em.createNativeQuery("SELECT count(d.id) FROM dataset d " + + " WHERE d.harvestingclient_id IS NOT NULL").getSingleResult(); + + } catch (Exception ex) { + logger.info("Warning: exception looking up the total number of harvested datasets: " + ex.getMessage()); + return 0L; + } + } public Long getNumberOfHarvestedDatasetByClients(List clients) { - String dvs = null; + String clientIds = null; for (HarvestingClient client: clients) { - if (dvs == null) { - dvs = client.getDataverse().getId().toString(); + if (clientIds == null) { + clientIds = client.getId().toString(); } else { - dvs = dvs.concat(","+client.getDataverse().getId().toString()); + clientIds = clientIds.concat(","+client.getId().toString()); } } try { - return (Long) em.createNativeQuery("SELECT count(d.id) FROM dataset d, " - + " dvobject o WHERE d.id = o.id AND o.owner_id in (" - + dvs + ")").getSingleResult(); + return (Long) em.createNativeQuery("SELECT count(d.id) FROM dataset d " + + " WHERE d.harvestingclient_id in (" + + clientIds + ")").getSingleResult(); } catch (Exception ex) { logger.info("Warning: exception trying to count harvested datasets by clients: " + ex.getMessage()); diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/StopHarvestException.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/StopHarvestException.java new file mode 100644 index 00000000000..dffa2dd0385 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/StopHarvestException.java @@ -0,0 +1,17 @@ +package edu.harvard.iq.dataverse.harvest.client; + +/** + * + * @author landreev + */ + +public class StopHarvestException extends Exception { + public StopHarvestException(String message) { + super(message); + } + + public StopHarvestException(String message, Throwable cause) { + super(message, cause); + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java index c0a039e2d2b..bb3dc06972c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java @@ -5,7 +5,6 @@ import io.gdcc.xoai.model.oaipmh.results.MetadataFormat; import io.gdcc.xoai.model.oaipmh.results.Set; import io.gdcc.xoai.serviceprovider.ServiceProvider; -import io.gdcc.xoai.serviceprovider.client.JdkHttpOaiClient; import io.gdcc.xoai.serviceprovider.exceptions.BadArgumentException; import io.gdcc.xoai.serviceprovider.exceptions.InvalidOAIResponse; import io.gdcc.xoai.serviceprovider.exceptions.NoSetHierarchyException; @@ -15,8 +14,10 @@ import edu.harvard.iq.dataverse.harvest.client.FastGetRecord; import static edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean.DATAVERSE_PROPRIETARY_METADATA_API; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import io.gdcc.xoai.serviceprovider.client.JdkHttpOaiClient; import java.io.IOException; import java.io.Serializable; +import java.net.http.HttpClient; import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.lang3.StringUtils; @@ -24,14 +25,18 @@ import javax.xml.transform.TransformerException; import java.util.ArrayList; import java.util.Date; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; +import java.util.logging.Logger; /** * * @author Leonid Andreev */ public class OaiHandler implements Serializable { + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.oai.OaiHandler"); public OaiHandler() { @@ -65,6 +70,8 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException this.fromDate = harvestingClient.getLastNonEmptyHarvestTime(); + this.customHeaders = makeCustomHeaders(harvestingClient.getCustomHttpHeaders()); + this.harvestingClient = harvestingClient; } @@ -74,6 +81,7 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException private String setName; private Date fromDate; private Boolean setListTruncated = false; + private Map customHeaders = null; private ServiceProvider serviceProvider; @@ -119,6 +127,14 @@ public boolean isSetListTruncated() { return setListTruncated; } + public Map getCustomHeaders() { + return this.customHeaders; + } + + public void setCustomHeaders(Map customHeaders) { + this.customHeaders = customHeaders; + } + public ServiceProvider getServiceProvider() throws OaiHandlerException { if (serviceProvider == null) { if (baseOaiUrl == null) { @@ -128,8 +144,15 @@ public ServiceProvider getServiceProvider() throws OaiHandlerException { context.withBaseUrl(baseOaiUrl); context.withGranularity(Granularity.Second); - // builds the client with the default parameters and the JDK http client: - context.withOAIClient(JdkHttpOaiClient.newBuilder().withBaseUrl(baseOaiUrl).build()); + + JdkHttpOaiClient.Builder xoaiClientBuilder = JdkHttpOaiClient.newBuilder().withBaseUrl(getBaseOaiUrl()); + if (getCustomHeaders() != null) { + for (String headerName : getCustomHeaders().keySet()) { + logger.fine("adding custom header; name: "+headerName+", value: "+getCustomHeaders().get(headerName)); + } + xoaiClientBuilder = xoaiClientBuilder.withCustomHeaders(getCustomHeaders()); + } + context.withOAIClient(xoaiClientBuilder.build()); serviceProvider = new ServiceProvider(context); } @@ -235,7 +258,7 @@ public Iterator
runListIdentifiers() throws OaiHandlerException { } - public FastGetRecord runGetRecord(String identifier) throws OaiHandlerException { + public FastGetRecord runGetRecord(String identifier, HttpClient httpClient) throws OaiHandlerException { if (StringUtils.isEmpty(this.baseOaiUrl)) { throw new OaiHandlerException("Attempted to execute GetRecord without server URL specified."); } @@ -244,7 +267,7 @@ public FastGetRecord runGetRecord(String identifier) throws OaiHandlerException } try { - return new FastGetRecord(this.baseOaiUrl, identifier, this.metadataPrefix); + return new FastGetRecord(this, identifier, httpClient); } catch (ParserConfigurationException pce) { throw new OaiHandlerException("ParserConfigurationException executing GetRecord: "+pce.getMessage()); } catch (SAXException se) { @@ -293,4 +316,28 @@ public void runIdentify() { // (we will need it, both for validating the remote server, // and to learn about its extended capabilities) } + + public Map makeCustomHeaders(String headersString) { + if (headersString != null) { + String[] parts = headersString.split("\\\\n"); + HashMap ret = new HashMap<>(); + logger.info("found "+parts.length+" parts"); + int count = 0; + for (int i = 0; i < parts.length; i++) { + if (parts[i].indexOf(':') > 0) { + String headerName = parts[i].substring(0, parts[i].indexOf(':')); + String headerValue = parts[i].substring(parts[i].indexOf(':')+1).strip(); + + ret.put(headerName, headerValue); + count++; + } + // simply skipping it if malformed; or we could throw an exception - ? + } + if (ret.size() > 0) { + logger.info("returning the array with "+ret.size()+" name/value pairs"); + return ret; + } + } + return null; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java index 6cdc4e5c277..5a8f2f41d31 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java @@ -375,4 +375,16 @@ public List findDeletedOaiRecordsBySetName(String setName) { } } + public Instant getEarliestDate() { + String queryString = "SELECT min(r.lastUpdateTime) FROM OAIRecord r"; + TypedQuery query = em.createQuery(queryString, Date.class); + Date retDate = query.getSingleResult(); + if (retDate != null) { + return retDate.toInstant(); + } + + // if there are no records yet, return the default "now" + return new Date().toInstant(); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index 5eacb1addb6..8840d433ae1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -31,10 +31,13 @@ import edu.harvard.iq.dataverse.util.MailUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import io.gdcc.xoai.exceptions.OAIException; +import io.gdcc.xoai.model.oaipmh.Granularity; +import io.gdcc.xoai.services.impl.SimpleResumptionTokenFormat; import org.apache.commons.lang3.StringUtils; import java.io.IOException; +import java.time.Instant; import java.util.logging.Logger; import javax.ejb.EJB; import javax.inject.Inject; @@ -96,9 +99,15 @@ public class OAIServlet extends HttpServlet { // be calling ListIdentifiers, and then making direct calls to the export // API of the remote Dataverse, to obtain the records in native json. This // is how we should have implemented this in the first place, really. + /* + SEK + per #3621 we are adding urls to the namespace and schema + These will not resolve presently. the change is so that the + xml produced by https://demo.dataverse.org/oai?verb=ListMetadataFormats will validate + */ private static final String DATAVERSE_EXTENDED_METADATA_FORMAT = "dataverse_json"; - private static final String DATAVERSE_EXTENDED_METADATA_NAMESPACE = "Custom Dataverse metadata in JSON format (Dataverse4 to Dataverse4 harvesting only)"; - private static final String DATAVERSE_EXTENDED_METADATA_SCHEMA = "JSON schema pending"; + private static final String DATAVERSE_EXTENDED_METADATA_NAMESPACE = "https://dataverse.org/schema/core"; + private static final String DATAVERSE_EXTENDED_METADATA_SCHEMA = "https://dataverse.org/schema/core.xsd"; private Context xoaiContext; private SetRepository setRepository; @@ -117,14 +126,13 @@ public void init(ServletConfig config) throws ServletException { } setRepository = new DataverseXoaiSetRepository(setService); - itemRepository = new DataverseXoaiItemRepository(recordService, datasetService, systemConfig.getDataverseSiteUrl()); + itemRepository = new DataverseXoaiItemRepository(recordService, datasetService, SystemConfig.getDataverseSiteUrlStatic()); repositoryConfiguration = createRepositoryConfiguration(); - xoaiRepository = new Repository() + xoaiRepository = new Repository(repositoryConfiguration) .withSetRepository(setRepository) - .withItemRepository(itemRepository) - .withConfiguration(repositoryConfiguration); + .withItemRepository(itemRepository); dataProvider = new DataProvider(getXoaiContext(), getXoaiRepository()); } @@ -187,23 +195,30 @@ private RepositoryConfiguration createRepositoryConfiguration() { } // The admin email address associated with this installation: // (Note: if the setting does not exist, we are going to assume that they - // have a reason not to want to advertise their email address, so no - // email will be shown in the output of Identify. + // have a reason not to want to configure their email address, if it is + // a developer's instance, for example; or a reason not to want to + // advertise it to the world.) InternetAddress systemEmailAddress = MailUtil.parseSystemAddress(settingsService.getValueForKey(SettingsServiceBean.Key.SystemEmail)); - - RepositoryConfiguration repositoryConfiguration = RepositoryConfiguration.defaults() - .withEnableMetadataAttributes(true) - .withRepositoryName(repositoryName) - .withBaseUrl(systemConfig.getDataverseSiteUrl()+"/oai") + String systemEmailLabel = systemEmailAddress != null ? systemEmailAddress.getAddress() : "donotreply@localhost"; + + RepositoryConfiguration configuration = new RepositoryConfiguration.RepositoryConfigurationBuilder() + .withAdminEmail(systemEmailLabel) .withCompression("gzip") .withCompression("deflate") - .withAdminEmail(systemEmailAddress != null ? systemEmailAddress.getAddress() : null) - .withDeleteMethod(DeletedRecord.TRANSIENT) + .withGranularity(Granularity.Lenient) + .withResumptionTokenFormat(new SimpleResumptionTokenFormat().withGranularity(Granularity.Second)) + .withRepositoryName(repositoryName) + .withBaseUrl(systemConfig.getDataverseSiteUrl()+"/oai") + .withEarliestDate(recordService.getEarliestDate()) .withMaxListIdentifiers(maxListIdentifiers) + .withMaxListSets(maxListSets) .withMaxListRecords(maxListRecords) - .withMaxListSets(maxListSets); + .withDeleteMethod(DeletedRecord.TRANSIENT) + .withEnableMetadataAttributes(true) + .withRequireFromAfterEarliest(false) + .build(); - return repositoryConfiguration; + return configuration; } /** diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java index faf3cf9ddc4..147d42648fa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java @@ -49,7 +49,7 @@ public DataverseXoaiItemRepository (OAIRecordServiceBean recordService, DatasetS } @Override - public ItemIdentifier getItem(String identifier) throws IdDoesNotExistException { + public ItemIdentifier getItemIdentifier(String identifier) throws IdDoesNotExistException { // This method is called when ListMetadataFormats request specifies // the identifier, requesting the formats available for this specific record. // In our case, under the current implementation, we need to simply look diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index b03bae618a4..9c6acd964c1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -20,6 +20,8 @@ package edu.harvard.iq.dataverse.ingest; +import edu.harvard.iq.dataverse.AuxiliaryFile; +import edu.harvard.iq.dataverse.AuxiliaryFileServiceBean; import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.datavariable.VariableCategory; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; @@ -72,6 +74,7 @@ //import edu.harvard.iq.dvn.unf.*; import org.dataverse.unf.*; import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -81,6 +84,7 @@ import java.nio.channels.FileChannel; import java.nio.channels.ReadableByteChannel; import java.nio.channels.WritableByteChannel; +import java.nio.charset.StandardCharsets; import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; @@ -113,6 +117,9 @@ import javax.jms.QueueSession; import javax.jms.Message; import javax.faces.application.FacesMessage; +import javax.ws.rs.core.MediaType; +import ucar.nc2.NetcdfFile; +import ucar.nc2.NetcdfFiles; /** * @@ -134,6 +141,8 @@ public class IngestServiceBean { @EJB DataFileServiceBean fileService; @EJB + AuxiliaryFileServiceBean auxiliaryFileService; + @EJB SystemConfig systemConfig; @Resource(lookup = "java:app/jms/queue/ingest") @@ -232,6 +241,9 @@ public List saveAndAddFilesToDataset(DatasetVersion version, savedSuccess = true; logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel()); + // TODO: reformat this file to remove the many tabs added in cc08330 + extractMetadataNcml(dataFile, tempLocationPath); + } catch (IOException ioex) { logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")"); } finally { @@ -343,6 +355,7 @@ public List saveAndAddFilesToDataset(DatasetVersion version, try { // FITS is the only type supported for metadata // extraction, as of now. -- L.A. 4.0 + // Note that extractMetadataNcml() is used for NetCDF/HDF5. dataFile.setContentType("application/fits"); metadataExtracted = extractMetadata(tempFileLocation, dataFile, version); } catch (IOException mex) { @@ -565,7 +578,6 @@ public int compare(DataFile d1, DataFile d2) { return sb.toString(); } - public void produceSummaryStatistics(DataFile dataFile, File generatedTabularFile) throws IOException { /* logger.info("Skipping summary statistics and UNF."); @@ -1206,7 +1218,104 @@ public boolean extractMetadata(String tempFileLocation, DataFile dataFile, Datas return ingestSuccessful; } - + /** + * @param dataFile The DataFile from which to attempt NcML extraction + * (NetCDF or HDF5 format) + * @param tempLocationPath Null if the file is already saved to permanent + * storage. Otherwise, the path to the temp location of the files, as during + * initial upload. + * @return True if the Ncml files was created. False on any error or if the + * NcML file already exists. + */ + public boolean extractMetadataNcml(DataFile dataFile, Path tempLocationPath) { + boolean ncmlFileCreated = false; + logger.fine("extractMetadataNcml: dataFileIn: " + dataFile + ". tempLocationPath: " + tempLocationPath); + InputStream inputStream = null; + String dataFileLocation = null; + if (tempLocationPath != null) { + // This file was just uploaded and hasn't been saved to S3 or local storage. + dataFileLocation = tempLocationPath.toString(); + } else { + // This file is already on S3 or local storage. + File tempFile = null; + File localFile; + StorageIO storageIO; + try { + storageIO = dataFile.getStorageIO(); + storageIO.open(); + if (storageIO.isLocalFile()) { + localFile = storageIO.getFileSystemPath().toFile(); + dataFileLocation = localFile.getAbsolutePath(); + logger.fine("extractMetadataNcml: file is local. Path: " + dataFileLocation); + } else { + // Need to create a temporary local file: + tempFile = File.createTempFile("tempFileExtractMetadataNcml", ".tmp"); + try ( ReadableByteChannel targetFileChannel = (ReadableByteChannel) storageIO.getReadChannel(); FileChannel tempFileChannel = new FileOutputStream(tempFile).getChannel();) { + tempFileChannel.transferFrom(targetFileChannel, 0, storageIO.getSize()); + } + dataFileLocation = tempFile.getAbsolutePath(); + logger.fine("extractMetadataNcml: file is on S3. Downloaded and saved to temp path: " + dataFileLocation); + } + } catch (IOException ex) { + logger.info("While attempting to extract NcML, could not use storageIO for data file id " + dataFile.getId() + ". Exception: " + ex); + } + } + if (dataFileLocation != null) { + try ( NetcdfFile netcdfFile = NetcdfFiles.open(dataFileLocation)) { + logger.fine("trying to open " + dataFileLocation); + if (netcdfFile != null) { + // For now, empty string. What should we pass as a URL to toNcml()? The filename (including the path) most commonly at https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_cookbook.html + // With an empty string the XML will show 'location="file:"'. + String ncml = netcdfFile.toNcml(""); + inputStream = new ByteArrayInputStream(ncml.getBytes(StandardCharsets.UTF_8)); + } else { + logger.info("NetcdfFiles.open() could not open file id " + dataFile.getId() + " (null returned)."); + } + } catch (IOException ex) { + logger.info("NetcdfFiles.open() could not open file id " + dataFile.getId() + ". Exception caught: " + ex); + } + } else { + logger.info("dataFileLocation is null for file id " + dataFile.getId() + ". Can't extract NcML."); + } + if (inputStream != null) { + // If you change NcML, you must also change the previewer. + String formatTag = "NcML"; + // 0.1 is arbitrary. It's our first attempt to put out NcML so we're giving it a low number. + // If you bump the number here, be sure the bump the number in the previewer as well. + // We could use 2.2 here since that's the current version of NcML. + String formatVersion = "0.1"; + String origin = "netcdf-java"; + boolean isPublic = true; + // See also file.auxfiles.types.NcML in Bundle.properties. Used to group aux files in UI. + String type = "NcML"; + // XML because NcML doesn't have its own MIME/content type at https://www.iana.org/assignments/media-types/media-types.xhtml + MediaType mediaType = new MediaType("text", "xml"); + try { + // Let the cascade do the save if the file isn't yet on permanent storage. + boolean callSave = false; + if (tempLocationPath == null) { + callSave = true; + // Check for an existing NcML file + logger.fine("Checking for existing NcML aux file for file id " + dataFile.getId()); + AuxiliaryFile existingAuxiliaryFile = auxiliaryFileService.lookupAuxiliaryFile(dataFile, formatTag, formatVersion); + if (existingAuxiliaryFile != null) { + logger.fine("Aux file already exists for NetCDF/HDF5 file for file id " + dataFile.getId()); + return false; + } + } + AuxiliaryFile auxFile = auxiliaryFileService.processAuxiliaryFile(inputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType, callSave); + logger.fine("Aux file extracted from NetCDF/HDF5 file saved to storage (but not to the database yet) from file id " + dataFile.getId()); + ncmlFileCreated = true; + } catch (Exception ex) { + logger.info("exception throw calling processAuxiliaryFile: " + ex); + } + } else { + logger.info("extractMetadataNcml: input stream is null! dataFileLocation was " + dataFileLocation); + } + + return ncmlFileCreated; + } + private void processDatasetMetadata(FileMetadataIngest fileMetadataIngest, DatasetVersion editVersion) throws IOException { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java index c2899b29d1f..6d17a5bd553 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java @@ -31,6 +31,7 @@ import javax.inject.Inject; // Rosuda Wrappers and Methods for R-calls to Rserve +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.rosuda.REngine.REXP; import org.rosuda.REngine.REXPMismatchException; import org.rosuda.REngine.RList; @@ -88,10 +89,10 @@ public class RDATAFileReader extends TabularDataFileReader { static private String RSCRIPT_WRITE_DVN_TABLE = ""; // RServe static variables - private static String RSERVE_HOST = System.getProperty("dataverse.rserve.host"); - private static String RSERVE_USER = System.getProperty("dataverse.rserve.user"); - private static String RSERVE_PASSWORD = System.getProperty("dataverse.rserve.password"); - private static int RSERVE_PORT; + private final String RSERVE_HOST; + private final int RSERVE_PORT; + private final String RSERVE_USER; + private final String RSERVE_PASSWORD; // TODO: // we're not using these time/data formats for anything, are we? @@ -138,24 +139,6 @@ public class RDATAFileReader extends TabularDataFileReader { * This is primarily to construct the R-Script */ static { - /* - * Set defaults fallbacks for class properties - */ - if (RSERVE_HOST == null) - RSERVE_HOST = "localhost"; - - if (RSERVE_USER == null) - RSERVE_USER = "rserve"; - - if (RSERVE_PASSWORD == null) - RSERVE_PASSWORD = "rserve"; - - if (System.getProperty("dataverse.ingest.rserve.port") == null) - RSERVE_PORT = 6311; - else - RSERVE_PORT = Integer.parseInt(System.getProperty("dataverse.rserve.port")); - - // Load R Scripts into memory, so that we can run them via R-serve RSCRIPT_WRITE_DVN_TABLE = readLocalResource("scripts/write.table.R"); RSCRIPT_GET_DATASET = readLocalResource("scripts/get.dataset.R"); @@ -451,7 +434,20 @@ public RDATAFileReader(TabularDataFileReaderSpi originator) { super(originator); - + // These settings have sane defaults in resources/META-INF/microprofile-config.properties, + // ready to be overridden by a sysadmin. Every time a file would be read with this file reader, + // a new reader will be created, reading from the cached config source settings with minimal overhead. + this.RSERVE_HOST = JvmSettings.RSERVE_HOST.lookup(); + int port; + try { + port = JvmSettings.RSERVE_PORT.lookup(Integer.class); + } catch (IllegalArgumentException e) { + LOG.log(Level.SEVERE, "Could not parse value for " + JvmSettings.RSERVE_PORT.getScopedKey() + ", defaulting to 6311", e); + port = 6311; + } + this.RSERVE_PORT = port; + this.RSERVE_USER = JvmSettings.RSERVE_USER.lookup(); + this.RSERVE_PASSWORD = JvmSettings.RSERVE_PASSWORD.lookup(); LOG.fine("RDATAFileReader: INSIDE RDATAFileReader"); diff --git a/src/main/java/edu/harvard/iq/dataverse/license/License.java b/src/main/java/edu/harvard/iq/dataverse/license/License.java index 96baacc6731..c6e2cdbc2e5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/license/License.java +++ b/src/main/java/edu/harvard/iq/dataverse/license/License.java @@ -23,9 +23,9 @@ */ @NamedQueries({ @NamedQuery( name="License.findAll", - query="SELECT l FROM License l ORDER BY (case when l.isDefault then 0 else 1 end), l.id asc"), + query="SELECT l FROM License l ORDER BY (case when l.isDefault then 0 else 1 end), l.sortOrder, l.id asc"), @NamedQuery( name="License.findAllActive", - query="SELECT l FROM License l WHERE l.active='true' ORDER BY (case when l.isDefault then 0 else 1 end), l.id asc"), + query="SELECT l FROM License l WHERE l.active='true' ORDER BY (case when l.isDefault then 0 else 1 end), l.sortOrder, l.id asc"), @NamedQuery( name="License.findById", query = "SELECT l FROM License l WHERE l.id=:id"), @NamedQuery( name="License.findDefault", @@ -42,6 +42,8 @@ query = "UPDATE License l SET l.isDefault='false'"), @NamedQuery( name="License.setActiveState", query = "UPDATE License l SET l.active=:state WHERE l.id=:id"), + @NamedQuery( name="License.setSortOrder", + query = "UPDATE License l SET l.sortOrder=:sortOrder WHERE l.id=:id"), }) @Entity @@ -73,6 +75,9 @@ public class License { @Column(nullable = false) private boolean isDefault; + + @Column(nullable = false, columnDefinition = "BIGINT NOT NULL DEFAULT 0") + private Long sortOrder; @OneToMany(mappedBy="license") private List termsOfUseAndAccess; @@ -80,7 +85,7 @@ public class License { public License() { } - public License(String name, String shortDescription, URI uri, URI iconUrl, boolean active) { + public License(String name, String shortDescription, URI uri, URI iconUrl, boolean active, Long sortOrder) { this.name = name; this.shortDescription = shortDescription; this.uri = uri.toASCIIString(); @@ -91,6 +96,7 @@ public License(String name, String shortDescription, URI uri, URI iconUrl, boole } this.active = active; isDefault = false; + this.sortOrder = sortOrder; } public Long getId() { @@ -172,17 +178,26 @@ public void setTermsOfUseAndAccess(List termsOfUseAndAccess this.termsOfUseAndAccess = termsOfUseAndAccess; } + public Long getSortOrder() { + return sortOrder; + } + + public void setSortOrder(Long sortOrder) { + this.sortOrder = sortOrder; + } + @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; License license = (License) o; - return active == license.active && id.equals(license.id) && name.equals(license.name) && shortDescription.equals(license.shortDescription) && uri.equals(license.uri) && Objects.equals(iconUrl, license.iconUrl); + return active == license.active && id.equals(license.id) && name.equals(license.name) && shortDescription.equals(license.shortDescription) && uri.equals(license.uri) && Objects.equals(iconUrl, license.iconUrl) + && Objects.equals(sortOrder, license.sortOrder); } @Override public int hashCode() { - return Objects.hash(id, name, shortDescription, uri, iconUrl, active); + return Objects.hash(id, name, shortDescription, uri, iconUrl, active, sortOrder); } @Override @@ -195,6 +210,7 @@ public String toString() { ", iconUrl=" + iconUrl + ", active=" + active + ", isDefault=" + isDefault + + ", sortOrder=" + sortOrder + '}'; } diff --git a/src/main/java/edu/harvard/iq/dataverse/license/LicenseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/license/LicenseServiceBean.java index c18e168685a..b554fecd437 100644 --- a/src/main/java/edu/harvard/iq/dataverse/license/LicenseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/license/LicenseServiceBean.java @@ -93,11 +93,23 @@ public int setActive(Long id, boolean state) throws WrappedResponse { new IllegalArgumentException("License already " + (state ? "active" : "inactive")), null); } } + + public int setSortOrder(Long id, Long sortOrder) throws WrappedResponse { + License candidate = getById(id); + if (candidate == null) + return 0; + + return em.createNamedQuery("License.setSortOrder").setParameter("id", id).setParameter("sortOrder", sortOrder) + .executeUpdate(); + } public License save(License license) throws WrappedResponse { if (license.getId() != null) { throw new WrappedResponse(new IllegalArgumentException("There shouldn't be an ID in the request body"), null); } + if (license.getSortOrder() == null) { + throw new WrappedResponse(new IllegalArgumentException("There should be a sort order value in the request body"), null); + } try { em.persist(license); em.flush(); diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index 0189faf6598..50c8c4098a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -513,7 +513,9 @@ public JsonArray fileDownloads(String yyyymm, Dataverse d, boolean uniqueCounts) for (Object[] result : results) { JsonObjectBuilder job = Json.createObjectBuilder(); job.add(MetricsUtil.ID, (int) result[0]); - job.add(MetricsUtil.PID, (String) result[1]); + if(result[1]!=null) { + job.add(MetricsUtil.PID, (String) result[1]); + } job.add(MetricsUtil.COUNT, (long) result[2]); jab.add(job); } diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsUtil.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsUtil.java index 90b61bcb29c..72d8f5402bb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsUtil.java @@ -227,7 +227,9 @@ public static JsonArray timeSeriesByIDAndPIDToJson(List results) { JsonObjectBuilder job = Json.createObjectBuilder(); job.add(MetricsUtil.DATE, date); job.add(ID, id); - job.add(PID, pids.get(id)); + if(pids.get(id)!=null) { + job.add(PID, pids.get(id)); + } job.add(COUNT, totals.get(id)); jab.add(job); } diff --git a/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java b/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java index f13b6f11434..df2e44ecb27 100644 --- a/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java +++ b/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java @@ -41,6 +41,7 @@ import java.util.Set; import java.util.logging.Logger; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.RandomStringUtils; @@ -72,57 +73,33 @@ public class RemoteDataFrameService { private static String TMP_TABDATA_FILE_EXT = ".tab"; private static String TMP_RDATA_FILE_EXT = ".RData"; - - private static String RSERVE_HOST = null; - private static String RSERVE_USER = null; - private static String RSERVE_PWD = null; - private static int RSERVE_PORT = -1; + + // These settings have sane defaults in resources/META-INF/microprofile-config.properties, + // ready to be overridden by a sysadmin + private final String RSERVE_HOST; + private final String RSERVE_USER; + private final String RSERVE_PWD; + private final int RSERVE_PORT; + private final String RSERVE_TMP_DIR; private static String DATAVERSE_R_FUNCTIONS = "scripts/dataverse_r_functions.R"; private static String DATAVERSE_R_PREPROCESSING = "scripts/preprocess.R"; - - public static String LOCAL_TEMP_DIR = System.getProperty("java.io.tmpdir"); - public static String RSERVE_TMP_DIR=null; public String PID = null; public String tempFileNameIn = null; public String tempFileNameOut = null; - - static { - - RSERVE_TMP_DIR = System.getProperty("dataverse.rserve.tempdir"); - - if (RSERVE_TMP_DIR == null){ - RSERVE_TMP_DIR = "/tmp/"; - } - - RSERVE_HOST = System.getProperty("dataverse.rserve.host"); - if (RSERVE_HOST == null){ - RSERVE_HOST= "localhost"; - } - - RSERVE_USER = System.getProperty("dataverse.rserve.user"); - if (RSERVE_USER == null){ - RSERVE_USER= "rserve"; - } - - RSERVE_PWD = System.getProperty("dataverse.rserve.password"); - if (RSERVE_PWD == null){ - RSERVE_PWD= "rserve"; - } - - - if (System.getProperty("dataverse.rserve.port") == null ){ - RSERVE_PORT= 6311; - } else { - RSERVE_PORT = Integer.parseInt(System.getProperty("dataverse.rserve.port")); - } - - } - - public RemoteDataFrameService() { + // These settings have sane defaults in resources/META-INF/microprofile-config.properties, + // ready to be overridden by a sysadmin. Config sources have their own caches, so adding + // these here means the setting can be changed dynamically without too much overhead. + this.RSERVE_HOST = JvmSettings.RSERVE_HOST.lookup(); + this.RSERVE_USER = JvmSettings.RSERVE_USER.lookup(); + this.RSERVE_PWD = JvmSettings.RSERVE_PASSWORD.lookup(); + this.RSERVE_PORT = JvmSettings.RSERVE_PORT.lookup(Integer.class); + this.RSERVE_TMP_DIR = JvmSettings.RSERVE_TEMPDIR.lookup(); + + // initialization PID = RandomStringUtils.randomNumeric(6); @@ -703,15 +680,12 @@ public Map runDataFrameRequest(RJobRequest jobRequest, RConnecti public File transferRemoteFile(RConnection connection, String targetFilename, String tmpFilePrefix, String tmpFileExt, int fileSize) { - // set up a local temp file: - + // set up a local temp file: File tmpResultFile = null; - String resultFile = tmpFilePrefix + PID + "." + tmpFileExt; - RFileInputStream rInStream = null; OutputStream outbr = null; try { - tmpResultFile = new File(LOCAL_TEMP_DIR, resultFile); + tmpResultFile = File.createTempFile(tmpFilePrefix + PID, "."+tmpFileExt); outbr = new BufferedOutputStream(new FileOutputStream(tmpResultFile)); // open the input stream rInStream = connection.openFile(targetFilename); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java b/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java index a7a89def449..ef37569ac54 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java @@ -111,7 +111,8 @@ private String constructDatasetQuery() { List queryStrings = new ArrayList<>(); for (DatasetFieldType dsfType : metadataFieldList) { if (dsfType.getSearchValue() != null && !dsfType.getSearchValue().equals("")) { - queryStrings.add(constructQuery(dsfType.getSolrField().getNameSearchable(), dsfType.getSearchValue())); + //CVoc fields return term URIs - add quotes around them to avoid solr breaking them into individual search words + queryStrings.add(constructQuery(dsfType.getSolrField().getNameSearchable(), dsfType.getSearchValue(), getCVocConf().containsKey(dsfType.getId()))); } else if (dsfType.getListValues() != null && !dsfType.getListValues().isEmpty()) { List listQueryStrings = new ArrayList<>(); for (String value : dsfType.getListValues()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 484e5768eb1..e73cce8acbe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -6,6 +6,7 @@ import edu.harvard.iq.dataverse.DataFileTag; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DatasetFieldType; @@ -30,6 +31,7 @@ import edu.harvard.iq.dataverse.datavariable.VariableMetadataUtil; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; @@ -37,6 +39,7 @@ import java.io.IOException; import java.io.InputStream; import java.sql.Timestamp; +import java.text.NumberFormat; import java.text.SimpleDateFormat; import java.time.LocalDate; import java.util.ArrayList; @@ -86,6 +89,8 @@ import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.sax.BodyContentHandler; +import org.eclipse.microprofile.config.Config; +import org.eclipse.microprofile.config.ConfigProvider; import org.xml.sax.ContentHandler; @Stateless @@ -93,6 +98,7 @@ public class IndexServiceBean { private static final Logger logger = Logger.getLogger(IndexServiceBean.class.getCanonicalName()); + private static final Config config = ConfigProvider.getConfig(); @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; @@ -153,13 +159,18 @@ public class IndexServiceBean { public static final String HARVESTED = "Harvested"; private String rootDataverseName; private Dataverse rootDataverseCached; - private SolrClient solrServer; + SolrClient solrServer; private VariableMetadataUtil variableMetadataUtil; @PostConstruct public void init() { - String urlString = "http://" + systemConfig.getSolrHostColonPort() + "/solr/collection1"; + // Get from MPCONFIG. Might be configured by a sysadmin or simply return the default shipped with + // resources/META-INF/microprofile-config.properties. + String protocol = JvmSettings.SOLR_PROT.lookup(); + String path = JvmSettings.SOLR_PATH.lookup(); + + String urlString = protocol + "://" + systemConfig.getSolrHostColonPort() + path; solrServer = new HttpSolrClient.Builder(urlString).build(); rootDataverseName = findRootDataverseCached().getName(); @@ -947,6 +958,70 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set Float.parseFloat(westLon)) { + minWestLon=westLon; + } + if(maxEastLon==null || Float.parseFloat(maxEastLon) < Float.parseFloat(eastLon)) { + maxEastLon=eastLon; + } + if(minSouthLat==null || Float.parseFloat(minSouthLat) > Float.parseFloat(southLat)) { + minSouthLat=southLat; + } + if(maxNorthLat==null || Float.parseFloat(maxNorthLat) < Float.parseFloat(northLat)) { + maxNorthLat=northLat; + } + //W, E, N, S + solrInputDocument.addField(SearchFields.GEOLOCATION, "ENVELOPE(" + westLon + "," + eastLon + "," + northLat + "," + southLat + ")"); + } + } + //Only one bbox per dataset + //W, E, N, S + if ((minWestLon != null || maxEastLon != null) && (maxNorthLat != null || minSouthLat != null)) { + solrInputDocument.addField(SearchFields.BOUNDING_BOX, "ENVELOPE(" + minWestLon + "," + maxEastLon + "," + maxNorthLat + "," + minSouthLat + ")"); + } + + } } for(String metadataBlockName : metadataBlocksWithValue) { @@ -1422,6 +1497,7 @@ private List findAllLinkingDataverses(DvObject dvObject){ dataset = (Dataset) dvObject; linkingDataverses = dsLinkingService.findLinkingDataverses(dataset.getId()); ancestorList = dataset.getOwner().getOwners(); + ancestorList.add(dataset.getOwner()); //to show dataset in linking dv when parent dv is linked } if(dvObject.isInstanceofDataverse()){ dv = (Dataverse) dvObject; @@ -1586,6 +1662,11 @@ private List retrieveDVOPaths(DvObject dvo) { logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex); } List dataversePaths = getDataversePathsFromSegments(dataverseSegments); + if (dataversePaths.size() > 0 && dvo.isInstanceofDataverse()) { + // removing the dataverse's own id from the paths + // fixes bug where if my parent dv was linked my dv was shown as linked to myself + dataversePaths.remove(dataversePaths.size() - 1); + } /* add linking paths */ diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java index 2e75a81ed5f..f3d5f85121d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java @@ -268,4 +268,9 @@ more targeted results for just datasets. The format is YYYY (i.e. public static final String FULL_TEXT = "_text_"; public static final String EMBARGO_END_DATE = "embargoEndDate"; + // SpatialRecursivePrefixTreeFieldType: https://solr.apache.org/guide/8_11/spatial-search.html#rpt + public static final String GEOLOCATION = "geolocation"; + // BBoxField (bounding box): https://solr.apache.org/guide/8_11/spatial-search.html#bboxfield + public static final String BOUNDING_BOX = "boundingBox"; + } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 9bb83c88add..2b40347828a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -355,7 +355,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused DataverseRequest dataverseRequest = new DataverseRequest(session.getUser(), httpServletRequest); List dataverses = new ArrayList<>(); dataverses.add(dataverse); - solrQueryResponse = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinal, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false); + solrQueryResponse = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinal, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null); if (solrQueryResponse.hasError()){ logger.info(solrQueryResponse.getError()); setSolrErrorEncountered(true); @@ -363,7 +363,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused // This 2nd search() is for populating the "type" ("dataverse", "dataset", "file") facets: -- L.A. // (why exactly do we need it, again?) // To get the counts we display in the types facets particulary for unselected types - SEK 08/25/2021 - solrQueryResponseAllTypes = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalAllTypes, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false); + solrQueryResponseAllTypes = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalAllTypes, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null); if (solrQueryResponse.hasError()){ logger.info(solrQueryResponse.getError()); setSolrErrorEncountered(true); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index ca158198204..b87a334e938 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -100,7 +100,7 @@ public class SearchServiceBean { * @throws SearchException */ public SolrQueryResponse search(DataverseRequest dataverseRequest, List dataverses, String query, List filterQueries, String sortField, String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage) throws SearchException { - return search(dataverseRequest, dataverses, query, filterQueries, sortField, sortOrder, paginationStart, onlyDatatRelatedToMe, numResultsPerPage, true); + return search(dataverseRequest, dataverses, query, filterQueries, sortField, sortOrder, paginationStart, onlyDatatRelatedToMe, numResultsPerPage, true, null, null); } /** @@ -121,10 +121,24 @@ public SolrQueryResponse search(DataverseRequest dataverseRequest, List dataverses, String query, List filterQueries, String sortField, String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage, boolean retrieveEntities) throws SearchException { + public SolrQueryResponse search( + DataverseRequest dataverseRequest, + List dataverses, + String query, + List filterQueries, + String sortField, String sortOrder, + int paginationStart, + boolean onlyDatatRelatedToMe, + int numResultsPerPage, + boolean retrieveEntities, + String geoPoint, + String geoRadius + ) throws SearchException { if (paginationStart < 0) { throw new IllegalArgumentException("paginationStart must be 0 or greater"); @@ -204,8 +218,12 @@ public SolrQueryResponse search(DataverseRequest dataverseRequest, List queryStrings, boolean isAnd, bo return queryBuilder.toString().trim(); } - + + /** + * @return Null if supplied point is null or whitespace. + * @throws IllegalArgumentException If the lat/long is not separated by a + * comma. + * @throws NumberFormatException If the lat/long values are not numbers. + */ + public static String getGeoPoint(String userSuppliedGeoPoint) throws IllegalArgumentException, NumberFormatException { + if (userSuppliedGeoPoint == null || userSuppliedGeoPoint.isBlank()) { + return null; + } + String[] parts = userSuppliedGeoPoint.split(","); + // We'll supply our own errors but Solr gives a decent one: + // "Point must be in 'lat, lon' or 'x y' format: 42.3;-71.1" + if (parts.length != 2) { + String msg = "Must contain a single comma to separate latitude and longitude."; + throw new IllegalArgumentException(msg); + } + float latitude = Float.parseFloat(parts[0]); + float longitude = Float.parseFloat(parts[1]); + return latitude + "," + longitude; + } + + /** + * @return Null if supplied radius is null or whitespace. + * @throws NumberFormatException If the radius is not a positive number. + */ + public static String getGeoRadius(String userSuppliedGeoRadius) throws NumberFormatException { + if (userSuppliedGeoRadius == null || userSuppliedGeoRadius.isBlank()) { + return null; + } + float radius = 0; + try { + radius = Float.parseFloat(userSuppliedGeoRadius); + } catch (NumberFormatException ex) { + String msg = "Non-number radius supplied."; + throw new NumberFormatException(msg); + } + if (radius <= 0) { + String msg = "The supplied radius must be greater than zero."; + throw new NumberFormatException(msg); + } + return userSuppliedGeoRadius; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java index f00ece9aacc..0dc2fe08b54 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java @@ -5,16 +5,18 @@ */ package edu.harvard.iq.dataverse.search; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.SystemConfig; -import java.io.IOException; -import java.util.logging.Logger; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.impl.HttpSolrClient; + import javax.annotation.PostConstruct; import javax.annotation.PreDestroy; import javax.ejb.EJB; import javax.ejb.Singleton; import javax.inject.Named; -import org.apache.solr.client.solrj.SolrClient; -import org.apache.solr.client.solrj.impl.HttpSolrClient; +import java.io.IOException; +import java.util.logging.Logger; /** * @@ -38,9 +40,13 @@ public class SolrClientService { @PostConstruct public void init() { - String urlString = "http://" + systemConfig.getSolrHostColonPort() + "/solr/collection1"; - solrClient = new HttpSolrClient.Builder(urlString).build(); + // Get from MPCONFIG. Might be configured by a sysadmin or simply return the default shipped with + // resources/META-INF/microprofile-config.properties. + String protocol = JvmSettings.SOLR_PROT.lookup(); + String path = JvmSettings.SOLR_PATH.lookup(); + String urlString = protocol + "://" + systemConfig.getSolrHostColonPort() + path; + solrClient = new HttpSolrClient.Builder(urlString).build(); } @PreDestroy diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java index ef4422e8d89..5856004ce53 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java @@ -10,9 +10,7 @@ import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.DvObjectServiceBean; import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.IOException; -import java.sql.Timestamp; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -36,9 +34,7 @@ public class SolrIndexServiceBean { private static final Logger logger = Logger.getLogger(SolrIndexServiceBean.class.getCanonicalName()); - - @EJB - SystemConfig systemConfig; + @EJB DvObjectServiceBean dvObjectService; @EJB diff --git a/src/main/java/edu/harvard/iq/dataverse/search/savedsearch/SavedSearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/savedsearch/SavedSearchServiceBean.java index a495842e40d..587e054dc4a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/savedsearch/SavedSearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/savedsearch/SavedSearchServiceBean.java @@ -266,7 +266,9 @@ private SolrQueryResponse findHits(SavedSearch savedSearch) throws SearchExcepti paginationStart, dataRelatedToMe, numResultsPerPage, - false // do not retrieve entities + false, // do not retrieve entities + null, + null ); return solrQueryResponse; } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 223e4b86da9..ed3a161075b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -41,6 +41,32 @@ public enum JvmSettings { // GENERAL SETTINGS VERSION(PREFIX, "version"), BUILD(PREFIX, "build"), + FQDN(PREFIX, "fqdn"), + SITE_URL(PREFIX, "siteUrl"), + + // FILES SETTINGS + SCOPE_FILES(PREFIX, "files"), + FILES_DIRECTORY(SCOPE_FILES, "directory"), + + // SOLR INDEX SETTINGS + SCOPE_SOLR(PREFIX, "solr"), + SOLR_HOST(SCOPE_SOLR, "host"), + SOLR_PORT(SCOPE_SOLR, "port"), + SOLR_PROT(SCOPE_SOLR, "protocol"), + SOLR_CORE(SCOPE_SOLR, "core"), + SOLR_PATH(SCOPE_SOLR, "path"), + + // RSERVE CONNECTION + SCOPE_RSERVE(PREFIX, "rserve"), + RSERVE_HOST(SCOPE_RSERVE, "host"), + RSERVE_PORT(SCOPE_RSERVE, "port", "dataverse.ingest.rserve.port"), + RSERVE_USER(SCOPE_RSERVE, "user"), + RSERVE_PASSWORD(SCOPE_RSERVE, "password"), + RSERVE_TEMPDIR(SCOPE_RSERVE, "tempdir"), + + // API SETTINGS + SCOPE_API(PREFIX, "api"), + API_SIGNING_SECRET(SCOPE_API, "signing-secret"), ; diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 102772bdcf3..d84e18d5931 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -174,7 +174,12 @@ public enum Key { * */ SearchRespectPermissionRoot, - /** Solr hostname and port, such as "localhost:8983". */ + /** + * Solr hostname and port, such as "localhost:8983". + * @deprecated New installations should not use this database setting, but use {@link JvmSettings#SOLR_HOST} + * and {@link JvmSettings#SOLR_PORT}. + */ + @Deprecated(forRemoval = true, since = "2022-12-23") SolrHostColonPort, /** Enable full-text indexing in solr up to max file size */ SolrFullTextIndexing, //true or false (default) @@ -563,11 +568,16 @@ Whether Harvesting (OAI) service is enabled /* * Allow a custom JavaScript to control values of specific fields. */ - ControlledVocabularyCustomJavaScript, + ControlledVocabularyCustomJavaScript, /** * A compound setting for disabling signup for remote Auth providers: */ - AllowRemoteAuthSignUp + AllowRemoteAuthSignUp, + /** + * The URL for the DvWebLoader tool (see github.com/gdcc/dvwebloader for details) + */ + WebloaderUrl + ; @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java index 630f192890b..800c05ae6dc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java @@ -16,9 +16,13 @@ // HINT: PGSimpleDataSource would work too, but as we use a connection pool, go with a javax.sql.ConnectionPoolDataSource // HINT: PGXADataSource is unnecessary (no distributed transactions used) and breaks ingest. className = "org.postgresql.ds.PGConnectionPoolDataSource", - user = "${MPCONFIG=dataverse.db.user}", + + // BEWARE: as this resource is created before defaults are read from META-INF/microprofile-config.properties, + // defaults must be provided in this Payara-proprietary manner. + user = "${MPCONFIG=dataverse.db.user:dataverse}", password = "${MPCONFIG=dataverse.db.password}", - url = "jdbc:postgresql://${MPCONFIG=dataverse.db.host}:${MPCONFIG=dataverse.db.port}/${MPCONFIG=dataverse.db.name}", + url = "jdbc:postgresql://${MPCONFIG=dataverse.db.host:localhost}:${MPCONFIG=dataverse.db.port:5432}/${MPCONFIG=dataverse.db.name:dataverse}?${MPCONFIG=dataverse.db.parameters:}", + // If we ever need to change these pool settings, we need to remove this class and create the resource // from web.xml. We can use MicroProfile Config in there for these values, impossible to do in the annotation. // @@ -30,18 +34,30 @@ maxPoolSize = 100, // "The number of seconds that a physical connection should remain unused in the pool before the connection is closed for a connection pool. " // Payara DataSourceDefinitionDeployer default value = 300 (seconds) - maxIdleTime = 300) -// It's possible to add additional properties like this... -// -//properties = { -// "fish.payara.log-jdbc-calls=true" -//}) -// -// ... but at this time we don't think we need any. The full list -// of properties can be found at https://docs.payara.fish/community/docs/5.2021.6/documentation/payara-server/jdbc/advanced-connection-pool-properties.html#full-list-of-properties -// -// All these properties cannot be configured via MPCONFIG as Payara doesn't support this (yet). To be enhanced. -// See also https://github.com/payara/Payara/issues/5024 + maxIdleTime = 300, + + // Set more options via MPCONFIG, including defaults where applicable. + // TODO: Future versions of Payara might support setting integer properties like pool size, + // idle times, etc in a Payara-propietary way. See https://github.com/payara/Payara/pull/5272 + properties = { + // The following options are documented here: + // https://docs.payara.fish/community/docs/documentation/payara-server/jdbc/advanced-connection-pool-properties.html + // VALIDATION + "fish.payara.is-connection-validation-required=${MPCONFIG=dataverse.db.is-connection-validation-required:false}", + "fish.payara.connection-validation-method=${MPCONFIG=dataverse.db.connection-validation-method:}", + "fish.payara.validation-table-name=${MPCONFIG=dataverse.db.validation-table-name:}", + "fish.payara.validation-classname=${MPCONFIG=dataverse.db.validation-classname:}", + "fish.payara.validate-atmost-once-period-in-seconds=${MPCONFIG=dataverse.db.validate-atmost-once-period-in-seconds:0}", + // LEAK DETECTION + "fish.payara.connection-leak-timeout-in-seconds=${MPCONFIG=dataverse.db.connection-leak-timeout-in-seconds:0}", + "fish.payara.connection-leak-reclaim=${MPCONFIG=dataverse.db.connection-leak-reclaim:false}", + "fish.payara.statement-leak-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-leak-timeout-in-seconds:0}", + "fish.payara.statement-leak-reclaim=${MPCONFIG=dataverse.db.statement-leak-reclaim:false}", + // LOGGING, SLOWNESS, PERFORMANCE + "fish.payara.statement-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-timeout-in-seconds:-1}", + "fish.payara.slow-query-threshold-in-seconds=${MPCONFIG=dataverse.db.slow-query-threshold-in-seconds:-1}", + "fish.payara.log-jdbc-calls=${MPCONFIG=dataverse.db.log-jdbc-calls:false}" + }) public class DataSourceProducer { @Resource(lookup = "java:app/jdbc/dataverse") diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 339de904f9e..c600abfd409 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -40,6 +40,7 @@ import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper; import edu.harvard.iq.dataverse.ingest.IngestableDataChecker; import edu.harvard.iq.dataverse.license.License; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.file.BagItFileHandler; import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; import edu.harvard.iq.dataverse.util.file.BagItFileHandlerFactory; @@ -108,6 +109,8 @@ import java.util.Arrays; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; +import ucar.nc2.NetcdfFile; +import ucar.nc2.NetcdfFiles; /** * a 4.0 implementation of the DVN FileUtil; @@ -467,6 +470,11 @@ public static String determineFileType(File f, String fileName) throws IOExcepti fileType = "application/fits"; } } + + // step 3: Check if NetCDF or HDF5 + if (fileType == null) { + fileType = checkNetcdfOrHdf5(f); + } // step 3: check the mime type of this file with Jhove if (fileType == null){ @@ -669,6 +677,43 @@ private static boolean isGraphMLFile(File file) { return isGraphML; } + public static String checkNetcdfOrHdf5(File file) { + try ( NetcdfFile netcdfFile = NetcdfFiles.open(file.getAbsolutePath())) { + if (netcdfFile == null) { + // Can't open as a NetCDF or HDF5 file. + return null; + } + String type = netcdfFile.getFileTypeId(); + if (type == null) { + return null; + } + switch (type) { + case "NetCDF": + return "application/netcdf"; + case "NetCDF-4": + return "application/netcdf"; + case "HDF5": + return "application/x-hdf5"; + default: + break; + } + } catch (IOException ex) { + /** + * When an HDF4 file is passed, it won't be detected. Instead, we've + * seen exceptions like this: + * + * ucar.nc2.internal.iosp.hdf4.H4header makeDimension WARNING: + * **dimension length=0 for TagVGroup= *refno=124 tag= VG (1965) + * Vgroup length=28 class= Dim0.0 name= ixx using data 123 + * + * java.lang.IllegalArgumentException: Dimension length =0 must be > + * 0 + */ + return null; + } + return null; + } + // from MD5Checksum.java public static String calculateChecksum(String datafile, ChecksumType checksumType) { @@ -1392,11 +1437,8 @@ public static boolean canIngestAsTabular(String mimeType) { } public static String getFilesTempDirectory() { - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.equals("")) { - filesRootDirectory = "/tmp/files"; - } - + + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); String filesTempDirectory = filesRootDirectory + "/temp"; if (!Files.exists(Paths.get(filesTempDirectory))) { @@ -2098,7 +2140,9 @@ public static String jsonArrayOfObjectsToCSV(JsonArray jsonArray, String... head JsonObject jo = (JsonObject) jv; String[] values = new String[headers.length]; for (int i = 0; i < headers.length; i++) { - values[i] = jo.get(headers[i]).toString(); + if(jo.containsKey(headers[i])) { + values[i] = jo.get(headers[i]).toString(); + } } csvSB.append("\n").append(String.join(",", values)); }); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java index d64a1f7cce1..72980c3451a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java @@ -34,8 +34,12 @@ public static String getSubjectTextBasedOnNotification(UserNotification userNoti List rootDvNameAsList = Arrays.asList(BrandingUtil.getInstallationBrandName()); String datasetDisplayName = ""; - if (objectOfNotification != null && (objectOfNotification instanceof Dataset) ) { - datasetDisplayName = ((Dataset)objectOfNotification).getDisplayName(); + if (objectOfNotification != null) { + if (objectOfNotification instanceof Dataset) { + datasetDisplayName = ((Dataset) objectOfNotification).getDisplayName(); + } else if (objectOfNotification instanceof DatasetVersion) { + datasetDisplayName = ((DatasetVersion) objectOfNotification).getDataset().getDisplayName(); + } } switch (userNotification.getType()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java new file mode 100644 index 00000000000..da33fc9597e --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -0,0 +1,155 @@ +package edu.harvard.iq.dataverse.util; + +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Logger; + +import javax.json.JsonArray; +import javax.json.JsonObject; +import javax.json.JsonObjectBuilder; +import javax.json.JsonString; + +import edu.harvard.iq.dataverse.export.openaire.Cleanup; +import edu.harvard.iq.dataverse.export.openaire.FirstNames; +import edu.harvard.iq.dataverse.export.openaire.Organizations; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; + +/** + * + * @author qqmyers + * + * Adapted from earlier code in OpenAireExportUtil + * + * Implements an algorithm derived from code at DataCite to determine + * whether a name is that of a Person or Organization and, if the + * former, to pull out the given and family names. + * + * Adds parameters that can improve accuracy: + * + * * e.g. for curated repositories, allowing the code to assume that all + * Person entries are in , order. + * + * * allow local configuration of specific words/phrases that will + * automatically categorize one-off cases that the algorithm would + * otherwise mis-categorize. For example, the code appears to not + * recognize names ending in "Project" as an Organization. + * + */ + +public class PersonOrOrgUtil { + + private static final Logger logger = Logger.getLogger(PersonOrOrgUtil.class.getCanonicalName()); + + static boolean assumeCommaInPersonName = false; + static List orgPhrases; + + static { + setAssumeCommaInPersonName(Boolean.parseBoolean(System.getProperty("dataverse.personOrOrg.assumeCommaInPersonName", "false"))); + setOrgPhraseArray(System.getProperty("dataverse.personOrOrg.orgPhraseArray", null)); + } + + /** + * This method tries to determine if a name belongs to a person or an + * organization and, if it is a person, what the given and family names are. The + * core algorithm is adapted from a Datacite algorithm, see + * https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 + * + * @param name + * - the name to test + * @param organizationIfTied + * - if a given name isn't found, should the name be assumed to be + * from an organization. This could be a generic true/false or + * information from some non-name aspect of the entity, e.g. which + * field is in use, or whether a .edu email exists, etc. + * @param isPerson + * - if this is known to be a person due to other info (i.e. they + * have an ORCID). In this case the algorithm is just looking for + * given/family names. + * @return + */ + public static JsonObject getPersonOrOrganization(String name, boolean organizationIfTied, boolean isPerson) { + name = Cleanup.normalize(name); + + String givenName = null; + String familyName = null; + + boolean isOrganization = !isPerson && Organizations.getInstance().isOrganization(name); + if (!isOrganization) { + for (String phrase : orgPhrases) { + if (name.contains(phrase)) { + isOrganization = true; + break; + } + } + } + if (name.contains(",")) { + givenName = FirstNames.getInstance().getFirstName(name); + // contributorName=, + if (givenName != null && !isOrganization) { + // givenName ok + isOrganization = false; + // contributor_map.put("nameType", "Personal"); + if (!name.replaceFirst(",", "").contains(",")) { + // contributorName=, + String[] fullName = name.split(", "); + givenName = fullName[1]; + familyName = fullName[0]; + } + } else if (isOrganization || organizationIfTied) { + isOrganization = true; + givenName = null; + } + + } else { + if (assumeCommaInPersonName && !isPerson) { + isOrganization = true; + } else { + givenName = FirstNames.getInstance().getFirstName(name); + + if (givenName != null && !isOrganization) { + isOrganization = false; + if (givenName.length() + 1 < name.length()) { + familyName = name.substring(givenName.length() + 1); + } + } else { + // default + if (isOrganization || organizationIfTied) { + isOrganization = true; + givenName=null; + } + } + } + } + JsonObjectBuilder job = new NullSafeJsonBuilder(); + job.add("fullName", name); + job.add("givenName", givenName); + job.add("familyName", familyName); + job.add("isPerson", !isOrganization); + return job.build(); + + } + + // Public for testing + public static void setOrgPhraseArray(String phraseArray) { + orgPhrases = new ArrayList(); + if (!StringUtil.isEmpty(phraseArray)) { + try { + JsonArray phrases = JsonUtil.getJsonArray(phraseArray); + phrases.forEach(val -> { + JsonString strVal = (JsonString) val; + orgPhrases.add(strVal.getString()); + }); + } catch (Exception e) { + logger.warning("Could not parse Org phrase list"); + } + } + + } + + // Public for testing + public static void setAssumeCommaInPersonName(boolean assume) { + assumeCommaInPersonName = assume; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 80af2df081c..c989add6e3d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -1,18 +1,26 @@ package edu.harvard.iq.dataverse.util; import com.ocpsoft.pretty.PrettyContext; - import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.DvObjectContainer; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinAuthenticationProvider; import edu.harvard.iq.dataverse.authorization.providers.oauth2.AbstractOAuth2AuthenticationProvider; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.validation.PasswordValidatorUtil; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; +import org.passay.CharacterRule; + +import javax.ejb.EJB; +import javax.ejb.Stateless; +import javax.inject.Named; +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonObject; +import javax.json.JsonReader; +import javax.json.JsonString; +import javax.json.JsonValue; import java.io.StringReader; import java.net.InetAddress; import java.net.UnknownHostException; @@ -23,25 +31,12 @@ import java.util.List; import java.util.Map; import java.util.MissingResourceException; -import java.util.Properties; +import java.util.Optional; import java.util.ResourceBundle; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.inject.Named; -import javax.json.Json; -import javax.json.JsonArray; -import javax.json.JsonObject; -import javax.json.JsonReader; -import javax.json.JsonString; -import javax.json.JsonValue; - -import org.passay.CharacterRule; -import org.apache.commons.io.IOUtils; - /** * System-wide configuration */ @@ -61,28 +56,7 @@ public class SystemConfig { AuthenticationServiceBean authenticationService; public static final String DATAVERSE_PATH = "/dataverse/"; - - /** - * A JVM option for the advertised fully qualified domain name (hostname) of - * the Dataverse installation, such as "dataverse.example.com", which may - * differ from the hostname that the server knows itself as. - * - * The equivalent in DVN 3.x was "dvn.inetAddress". - */ - public static final String FQDN = "dataverse.fqdn"; - - /** - * A JVM option for specifying the "official" URL of the site. - * Unlike the FQDN option above, this would be a complete URL, - * with the protocol, port number etc. - */ - public static final String SITE_URL = "dataverse.siteUrl"; - - /** - * A JVM option for where files are stored on the file system. - */ - public static final String FILES_DIRECTORY = "dataverse.files.directory"; - + /** * Some installations may not want download URLs to their files to be * available in Schema.org JSON-LD output. @@ -95,12 +69,6 @@ public class SystemConfig { */ private static final String PASSWORD_RESET_TIMEOUT_IN_MINUTES = "dataverse.auth.password-reset-timeout-in-minutes"; - /** - * A common place to find the String for a sane Solr hostname:port - * combination. - */ - private String saneDefaultForSolrHostColonPort = "localhost:8983"; - /** * The default number of datafiles that we allow to be created through * zip file upload. @@ -109,9 +77,8 @@ public class SystemConfig { public static final long defaultZipDownloadLimit = 104857600L; // 100MB private static final int defaultMultipleUploadFilesLimit = 1000; private static final int defaultLoginSessionTimeout = 480; // = 8 hours - - private static String appVersionString = null; - private static String buildNumberString = null; + + private String buildNumber = null; private static final String JVM_TIMER_SERVER_OPTION = "dataverse.timerServer"; @@ -132,137 +99,60 @@ public String getVersion() { // candidate for being moved into some kind of an application-scoped caching // service... some CachingService @Singleton - ? (L.A. 5.8) public String getVersion(boolean withBuildNumber) { - - if (appVersionString == null) { - - // The Version Number is no longer supplied in a .properties file - so - // we can't just do - // return BundleUtil.getStringFromBundle("version.number", null, ResourceBundle.getBundle("VersionNumber", Locale.US)); - // - // Instead, we'll rely on Maven placing the version number into the - // Manifest, and getting it from there: - // (this is considered a better practice, and will also allow us - // to maintain this number in only one place - the pom.xml file) - // -- L.A. 4.0.2 - - // One would assume, that once the version is in the MANIFEST.MF, - // as Implementation-Version:, it would be possible to obtain - // said version simply as - // appVersionString = getClass().getPackage().getImplementationVersion(); - // alas - that's not working, for whatever reason. (perhaps that's - // only how it works with jar-ed packages; not with .war files). - // People on the interwebs suggest that one should instead - // open the Manifest as a resource, then extract its attributes. - // There were some complications with that too. Plus, relying solely - // on the MANIFEST.MF would NOT work for those of the developers who - // are using "in place deployment" (i.e., where - // Netbeans runs their builds directly from the local target - // directory, bypassing the war file deployment; and the Manifest - // is only available in the .war file). For that reason, I am - // going to rely on the pom.properties file, and use java.util.Properties - // to read it. We have to look for this file in 2 different places - // depending on whether this is a .war file deployment, or a - // developers build. (the app-level META-INF is only populated when - // a .war file is built; the "maven-archiver" directory, on the other - // hand, is only available when it's a local build deployment). - // So, long story short, I'm resorting to the convoluted steps below. - // It may look hacky, but it should actually be pretty solid and - // reliable. - - - // First, find the absolute path url of the application persistence file - // always supplied with the Dataverse app: - java.net.URL fileUrl = Thread.currentThread().getContextClassLoader().getResource("META-INF/persistence.xml"); - String filePath = null; - - - if (fileUrl != null) { - filePath = fileUrl.getFile(); - if (filePath != null) { - InputStream mavenPropertiesInputStream = null; - String mavenPropertiesFilePath; - Properties mavenProperties = new Properties(); - - - filePath = filePath.replaceFirst("/[^/]*$", "/"); - // Using a relative path, find the location of the maven pom.properties file. - // First, try to look for it in the app-level META-INF. This will only be - // available if it's a war file deployment: - mavenPropertiesFilePath = filePath.concat("../../../META-INF/maven/edu.harvard.iq/dataverse/pom.properties"); - - try { - mavenPropertiesInputStream = new FileInputStream(mavenPropertiesFilePath); - } catch (IOException ioex) { - // OK, let's hope this is a local dev. build. - // In that case the properties file should be available in - // the maven-archiver directory: - - mavenPropertiesFilePath = filePath.concat("../../../../maven-archiver/pom.properties"); - - // try again: - - try { - mavenPropertiesInputStream = new FileInputStream(mavenPropertiesFilePath); - } catch (IOException ioex2) { - logger.warning("Failed to find and/or open for reading the pom.properties file."); - mavenPropertiesInputStream = null; - } - } - - if (mavenPropertiesInputStream != null) { - try { - mavenProperties.load(mavenPropertiesInputStream); - appVersionString = mavenProperties.getProperty("version"); - } catch (IOException ioex) { - logger.warning("caught IOException trying to read and parse the pom properties file."); - } finally { - IOUtils.closeQuietly(mavenPropertiesInputStream); - } - } - - } else { - logger.warning("Null file path representation of the location of persistence.xml in the webapp root directory!"); - } - } else { - logger.warning("Could not find the location of persistence.xml in the webapp root directory!"); - } - - - if (appVersionString == null) { - // still null? - defaulting to 4.0: - appVersionString = "4.0"; - } - } + // Retrieve the version via MPCONFIG + // NOTE: You may override the version via all methods of MPCONFIG. + // It will default to read from microprofile-config.properties source, + // which contains in the source a Maven property reference to ${project.version}. + // When packaging the app to deploy it, Maven will replace this, rendering it a static entry. + String appVersion = JvmSettings.VERSION.lookup(); if (withBuildNumber) { - if (buildNumberString == null) { - // (build number is still in a .properties file in the source tree; it only - // contains a real build number if this war file was built by - // Jenkins) - + if (buildNumber == null) { + // (build number is still in a .properties file in the source tree; it only + // contains a real build number if this war file was built by Jenkins) + // TODO: might be replaced with same trick as for version via Maven property w/ empty default try { - buildNumberString = ResourceBundle.getBundle("BuildNumber").getString("build.number"); + buildNumber = ResourceBundle.getBundle("BuildNumber").getString("build.number"); } catch (MissingResourceException ex) { - buildNumberString = null; + buildNumber = null; + } + + // Also try to read the build number via MicroProfile Config if not already present from the + // properties file (so can be overridden by env var or other source) + if (buildNumber == null || buildNumber.isEmpty()) { + buildNumber = JvmSettings.BUILD.lookupOptional().orElse(""); } } - if (buildNumberString != null && !buildNumberString.equals("")) { - return appVersionString + " build " + buildNumberString; - } - } + if (!buildNumber.equals("")) { + return appVersion + " build " + buildNumber; + } + } - return appVersionString; + return appVersion; } - + + /** + * Retrieve the Solr endpoint in "host:port" form, to be used with a Solr client. + * + * This will retrieve the setting from either the database ({@link SettingsServiceBean.Key#SolrHostColonPort}) or + * via Microprofile Config API (properties {@link JvmSettings#SOLR_HOST} and {@link JvmSettings#SOLR_PORT}). + * + * A database setting always takes precedence. If not given via other config sources, a default from + * resources/META-INF/microprofile-config.properties is used. (It's possible to use profiles.) + * + * @return Solr endpoint as string "hostname:port" + */ public String getSolrHostColonPort() { - String SolrHost; - if ( System.getenv("SOLR_SERVICE_HOST") != null && System.getenv("SOLR_SERVICE_HOST") != ""){ - SolrHost = System.getenv("SOLR_SERVICE_HOST"); - } - else SolrHost = saneDefaultForSolrHostColonPort; - String solrHostColonPort = settingsService.getValueForKey(SettingsServiceBean.Key.SolrHostColonPort, SolrHost); - return solrHostColonPort; + // Get from MPCONFIG. Might be configured by a sysadmin or simply return the default shipped with + // resources/META-INF/microprofile-config.properties. + // NOTE: containers should use system property mp.config.profile=ct to use sane container usage default + String host = JvmSettings.SOLR_HOST.lookup(); + String port = JvmSettings.SOLR_PORT.lookup(); + + // DB setting takes precedence over all. If not present, will return default from above. + return Optional.ofNullable(settingsService.getValueForKey(SettingsServiceBean.Key.SolrHostColonPort)) + .orElse(host + ":" + port); } public boolean isProvCollectionEnabled() { @@ -340,32 +230,58 @@ public static int getMinutesUntilPasswordResetTokenExpires() { } /** - * The "official", designated URL of the site; - * can be defined as a complete URL; or derived from the - * "official" hostname. If none of these options is set, - * defaults to the InetAddress.getLocalHOst() and https; - * These are legacy JVM options. Will be eventualy replaced - * by the Settings Service configuration. + * Lookup (or construct) the designated URL of this instance from configuration. + * + * Can be defined as a complete URL via dataverse.siteUrl; or derived from the hostname + * dataverse.fqdn and HTTPS. If none of these options is set, defaults to the + * {@link InetAddress#getLocalHost} and HTTPS. + * + * NOTE: This method does not provide any validation. + * TODO: The behaviour of this method is subject to a later change, see + * https://github.com/IQSS/dataverse/issues/6636 + * + * @return The designated URL of this instance as per configuration. */ public String getDataverseSiteUrl() { return getDataverseSiteUrlStatic(); } + /** + * Lookup (or construct) the designated URL of this instance from configuration. + * + * Can be defined as a complete URL via dataverse.siteUrl; or derived from the hostname + * dataverse.fqdn and HTTPS. If none of these options is set, defaults to the + * {@link InetAddress#getLocalHost} and HTTPS. + * + * NOTE: This method does not provide any validation. + * TODO: The behaviour of this method is subject to a later change, see + * https://github.com/IQSS/dataverse/issues/6636 + * + * @return The designated URL of this instance as per configuration. + */ public static String getDataverseSiteUrlStatic() { - String hostUrl = System.getProperty(SITE_URL); - if (hostUrl != null && !"".equals(hostUrl)) { - return hostUrl; + // If dataverse.siteUrl has been configured, simply return it + Optional siteUrl = JvmSettings.SITE_URL.lookupOptional(); + if (siteUrl.isPresent()) { + return siteUrl.get(); } - String hostName = System.getProperty(FQDN); - if (hostName == null) { - try { - hostName = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - return null; - } + + // Otherwise try to lookup dataverse.fqdn setting and default to HTTPS + Optional fqdn = JvmSettings.FQDN.lookupOptional(); + if (fqdn.isPresent()) { + return "https://" + fqdn.get(); + } + + // Last resort - get the servers local name and use it. + // BEWARE - this is dangerous. + // 1) A server might have a different name than your repository URL. + // 2) The underlying reverse DNS lookup might point to a different name than your repository URL. + // 3) If this server has multiple IPs assigned, which one will it be for the lookup? + try { + return "https://" + InetAddress.getLocalHost().getCanonicalHostName(); + } catch (UnknownHostException e) { + return null; } - hostUrl = "https://" + hostName; - return hostUrl; } /** @@ -375,22 +291,6 @@ public String getPageURLWithQueryString() { return PrettyContext.getCurrentInstance().getRequestURL().toURL() + PrettyContext.getCurrentInstance().getRequestQueryString().toQueryString(); } - /** - * The "official" server's fully-qualified domain name: - */ - public String getDataverseServer() { - // still reliese on a JVM option: - String fqdn = System.getProperty(FQDN); - if (fqdn == null) { - try { - fqdn = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - return null; - } - } - return fqdn; - } - public String getGuidesBaseUrl() { String saneDefault = "https://guides.dataverse.org"; String guidesBaseUrl = settingsService.getValueForKey(SettingsServiceBean.Key.GuidesBaseUrl, saneDefault); @@ -862,7 +762,13 @@ public enum FileUploadMethods { * Upload through Globus of large files */ - GLOBUS("globus") + GLOBUS("globus"), + + /** + * Upload folders of files through dvwebloader app + */ + + WEBLOADER("dvwebloader"); ; @@ -999,6 +905,10 @@ public boolean isRsyncUpload(){ public boolean isGlobusUpload(){ return getMethodAvailable(FileUploadMethods.GLOBUS.toString(), true); } + + public boolean isWebloaderUpload(){ + return getMethodAvailable(FileUploadMethods.WEBLOADER.toString(), true); + } // Controls if HTTP upload is enabled for both GUI and API. public boolean isHTTPUpload(){ diff --git a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java index b3d5f9d6b74..4acf2d544e8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java @@ -5,6 +5,9 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import javax.json.Json; +import javax.json.JsonValue; + import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.FileMetadata; @@ -95,12 +98,17 @@ public ApiToken getApiToken() { public String getLocaleCode() { return localeCode; } - - public String getQueryParam(String key, String value) { + + public JsonValue getParam(String value) { String tokenValue = null; tokenValue = getTokenValue(value); - if (tokenValue != null) { - return key + '=' + tokenValue; + if (tokenValue != null && !tokenValue.isBlank()) { + try{ + int x =Integer.parseInt(tokenValue); + return Json.createValue(x); + } catch (NumberFormatException nfe){ + return Json.createValue(tokenValue); + } } else { return null; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java index b11334520e6..29c4e8a6fb9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.util; +import java.net.MalformedURLException; import java.net.URL; import java.nio.charset.Charset; import java.util.List; @@ -19,6 +20,10 @@ public class UrlSignerUtil { private static final Logger logger = Logger.getLogger(UrlSignerUtil.class.getName()); + public static final String SIGNED_URL_TOKEN="token"; + public static final String SIGNED_URL_METHOD="method"; + public static final String SIGNED_URL_USER="user"; + public static final String SIGNED_URL_UNTIL="until"; /** * * @param baseUrl - the URL to sign - cannot contain query params @@ -34,7 +39,7 @@ public class UrlSignerUtil { * @return - the signed URL */ public static String signUrl(String baseUrl, Integer timeout, String user, String method, String key) { - StringBuilder signedUrl = new StringBuilder(baseUrl); + StringBuilder signedUrlBuilder = new StringBuilder(baseUrl); boolean firstParam = true; if (baseUrl.contains("?")) { @@ -44,33 +49,33 @@ public static String signUrl(String baseUrl, Integer timeout, String user, Strin LocalDateTime validTime = LocalDateTime.now(); validTime = validTime.plusMinutes(timeout); validTime.toString(); - signedUrl.append(firstParam ? "?" : "&").append("until=").append(validTime); + signedUrlBuilder.append(firstParam ? "?" : "&").append(SIGNED_URL_UNTIL + "=").append(validTime); firstParam = false; } if (user != null) { - signedUrl.append(firstParam ? "?" : "&").append("user=").append(user); + signedUrlBuilder.append(firstParam ? "?" : "&").append(SIGNED_URL_USER + "=").append(user); firstParam = false; } if (method != null) { - signedUrl.append(firstParam ? "?" : "&").append("method=").append(method); + signedUrlBuilder.append(firstParam ? "?" : "&").append(SIGNED_URL_METHOD + "=").append(method); firstParam=false; } - signedUrl.append(firstParam ? "?" : "&").append("token="); - logger.fine("String to sign: " + signedUrl.toString() + ""); - signedUrl.append(DigestUtils.sha512Hex(signedUrl.toString() + key)); - logger.fine("Generated Signed URL: " + signedUrl.toString()); + signedUrlBuilder.append(firstParam ? "?" : "&").append(SIGNED_URL_TOKEN + "="); + logger.fine("String to sign: " + signedUrlBuilder.toString() + ""); + String signedUrl = signedUrlBuilder.toString(); + signedUrl= signedUrl + (DigestUtils.sha512Hex(signedUrl + key)); if (logger.isLoggable(Level.FINE)) { logger.fine( - "URL signature is " + (isValidUrl(signedUrl.toString(), user, method, key) ? "valid" : "invalid")); + "URL signature is " + (isValidUrl(signedUrl, user, method, key) ? "valid" : "invalid")); } - return signedUrl.toString(); + return signedUrl; } /** * This method will only return true if the URL and parameters except the * "token" are unchanged from the original/match the values sent to this method, * and the "token" parameter matches what this method recalculates using the - * shared key THe method also assures that the "until" timestamp is after the + * shared key. The method also assures that the "until" timestamp is after the * current time. * * @param signedUrl - the signed URL as received from Dataverse @@ -97,19 +102,19 @@ public static boolean isValidUrl(String signedUrl, String user, String method, S String allowedMethod = null; String allowedUser = null; for (NameValuePair nvp : params) { - if (nvp.getName().equals("token")) { + if (nvp.getName().equals(SIGNED_URL_TOKEN)) { hash = nvp.getValue(); logger.fine("Hash: " + hash); } - if (nvp.getName().equals("until")) { + if (nvp.getName().equals(SIGNED_URL_UNTIL)) { dateString = nvp.getValue(); logger.fine("Until: " + dateString); } - if (nvp.getName().equals("method")) { + if (nvp.getName().equals(SIGNED_URL_METHOD)) { allowedMethod = nvp.getValue(); logger.fine("Method: " + allowedMethod); } - if (nvp.getName().equals("user")) { + if (nvp.getName().equals(SIGNED_URL_USER)) { allowedUser = nvp.getValue(); logger.fine("User: " + allowedUser); } @@ -148,4 +153,18 @@ public static boolean isValidUrl(String signedUrl, String user, String method, S return valid; } + public static boolean hasToken(String urlString) { + try { + URL url = new URL(urlString); + List params = URLEncodedUtils.parse(url.getQuery(), Charset.forName("UTF-8")); + for (NameValuePair nvp : params) { + if (nvp.getName().equals(SIGNED_URL_TOKEN)) { + return true; + } + } + } catch (MalformedURLException mue) { + logger.fine("Bad url string: " + urlString); + } + return false; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java new file mode 100644 index 00000000000..c2d9bf67236 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java @@ -0,0 +1,36 @@ +package edu.harvard.iq.dataverse.util; + +import java.util.Date; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map.Entry; +import java.util.logging.Logger; + +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpSession; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetPage; +import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; + +public class WebloaderUtil { + + private static final Logger logger = Logger.getLogger(WebloaderUtil.class.getCanonicalName()); + + /** + * Create the URL required to launch https://github.com/gdcc/dvwebloader + */ + public static String getWebloaderUrl(Dataset d, ApiToken apiToken, String localeCode, String baseUrl) { + // Use URLTokenUtil for params currently in common with external tools. + URLTokenUtil tokenUtil = new URLTokenUtil(d, apiToken, localeCode); + String appUrl; + appUrl = baseUrl + + "?datasetPid={datasetPid}&siteUrl={siteUrl}&key={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; + return tokenUtil.replaceTokensWithValues(appUrl); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index 4ecdc73ae6e..22e2c6c8d78 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -902,12 +902,13 @@ public String parseHarvestingClient(JsonObject obj, HarvestingClient harvestingC String dataverseAlias = obj.getString("dataverseAlias",null); harvestingClient.setName(obj.getString("nickName",null)); - harvestingClient.setHarvestType(obj.getString("type",null)); + harvestingClient.setHarvestStyle(obj.getString("style", "default")); harvestingClient.setHarvestingUrl(obj.getString("harvestUrl",null)); harvestingClient.setArchiveUrl(obj.getString("archiveUrl",null)); - harvestingClient.setArchiveDescription(obj.getString("archiveDescription")); + harvestingClient.setArchiveDescription(obj.getString("archiveDescription", null)); harvestingClient.setMetadataPrefix(obj.getString("metadataFormat",null)); harvestingClient.setHarvestingSet(obj.getString("set",null)); + harvestingClient.setCustomHttpHeaders(obj.getString("customHeaders", null)); return dataverseAlias; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index e088122419d..9f5401f77d1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -37,6 +37,7 @@ import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.globus.FileDetailsHolder; +import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.DatasetFieldWalker; @@ -550,6 +551,17 @@ public static JsonObjectBuilder json(DatasetFieldType fld) { fieldsBld.add("type", fld.getFieldType().toString()); fieldsBld.add("watermark", fld.getWatermark()); fieldsBld.add("description", fld.getDescription()); + fieldsBld.add("multiple", fld.isAllowMultiples()); + fieldsBld.add("isControlledVocabulary", fld.isControlledVocabulary()); + if (fld.isControlledVocabulary()) { + // If the field has a controlled vocabulary, + // add all values to the resulting JSON + JsonArrayBuilder jab = Json.createArrayBuilder(); + for (ControlledVocabularyValue cvv : fld.getControlledVocabularyValues()) { + jab.add(cvv.getStrValue()); + } + fieldsBld.add("controlledVocabularyValues", jab); + } if (!fld.getChildDatasetFieldTypes().isEmpty()) { JsonObjectBuilder subFieldsBld = jsonObjectBuilder(); for (DatasetFieldType subFld : fld.getChildDatasetFieldTypes()) { @@ -568,6 +580,7 @@ public static JsonObjectBuilder json(FileMetadata fmd) { // in a sense that there's no longer the category field in the // fileMetadata object; but there are now multiple, oneToMany file // categories - and we probably need to export them too!) -- L.A. 4.5 + // DONE: catgegories by name .add("description", fmd.getDescription()) .add("label", fmd.getLabel()) // "label" is the filename .add("restricted", fmd.isRestricted()) @@ -605,13 +618,13 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata) { // (TODO...? L.A. 4.5, Aug 7 2016) String fileName = null; - if (fileMetadata != null) { - fileName = fileMetadata.getLabel(); - } else if (df.getFileMetadata() != null) { + if (fileMetadata == null){ // Note that this may not necessarily grab the file metadata from the // version *you want*! (L.A.) - fileName = df.getFileMetadata().getLabel(); + fileMetadata = df.getFileMetadata(); } + + fileName = fileMetadata.getLabel(); String pidURL = ""; @@ -628,7 +641,8 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata) { .add("filename", fileName) .add("contentType", df.getContentType()) .add("filesize", df.getFilesize()) - .add("description", df.getDescription()) + .add("description", fileMetadata.getDescription()) + .add("categories", getFileCategories(fileMetadata)) .add("embargo", embargo) //.add("released", df.isReleased()) //.add("restricted", df.isRestricted()) @@ -655,6 +669,32 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata) { ; } + public static JsonObjectBuilder json(HarvestingClient harvestingClient) { + if (harvestingClient == null) { + return null; + } + + return jsonObjectBuilder().add("nickName", harvestingClient.getName()). + add("dataverseAlias", harvestingClient.getDataverse().getAlias()). + add("type", harvestingClient.getHarvestType()). + add("style", harvestingClient.getHarvestStyle()). + add("harvestUrl", harvestingClient.getHarvestingUrl()). + add("archiveUrl", harvestingClient.getArchiveUrl()). + add("archiveDescription", harvestingClient.getArchiveDescription()). + add("metadataFormat", harvestingClient.getMetadataPrefix()). + add("set", harvestingClient.getHarvestingSet()). + add("schedule", harvestingClient.isScheduled() ? harvestingClient.getScheduleDescription() : "none"). + add("status", harvestingClient.isHarvestingNow() ? "inProgress" : "inActive"). + add("customHeaders", harvestingClient.getCustomHttpHeaders()). + add("lastHarvest", harvestingClient.getLastHarvestTime() == null ? null : harvestingClient.getLastHarvestTime().toString()). + add("lastResult", harvestingClient.getLastResult()). + add("lastSuccessful", harvestingClient.getLastSuccessfulHarvestTime() == null ? null : harvestingClient.getLastSuccessfulHarvestTime().toString()). + add("lastNonEmpty", harvestingClient.getLastNonEmptyHarvestTime() == null ? null : harvestingClient.getLastNonEmptyHarvestTime().toString()). + add("lastDatasetsHarvested", harvestingClient.getLastHarvestedDatasetCount()). // == null ? "N/A" : harvestingClient.getLastHarvestedDatasetCount().toString()). + add("lastDatasetsDeleted", harvestingClient.getLastDeletedDatasetCount()). // == null ? "N/A" : harvestingClient.getLastDeletedDatasetCount().toString()). + add("lastDatasetsFailed", harvestingClient.getLastFailedDatasetCount()); // == null ? "N/A" : harvestingClient.getLastFailedDatasetCount().toString()); + } + public static String format(Date d) { return (d == null) ? null : Util.getDateTimeFormat().format(d); } @@ -691,7 +731,7 @@ public static JsonArrayBuilder getTabularFileTags(DataFile df) { } return tabularTags; } - + private static class DatasetFieldsToJson implements DatasetFieldWalker.Listener { Deque objectStack = new LinkedList<>(); @@ -833,7 +873,8 @@ public static JsonObjectBuilder json(License license) { .add("uri", license.getUri().toString()) .add("iconUrl", license.getIconUrl() == null ? null : license.getIconUrl().toString()) .add("active", license.isActive()) - .add("isDefault", license.isDefault()); + .add("isDefault", license.isDefault()) + .add("sortOrder", license.getSortOrder()); } public static Collector stringsToJsonArray() { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java index f4a3c635f8b..d02099eddb5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java @@ -57,10 +57,16 @@ public static String prettyPrint(javax.json.JsonObject jsonObject) { } return stringWriter.toString(); } - + public static javax.json.JsonObject getJsonObject(String serializedJson) { try (StringReader rdr = new StringReader(serializedJson)) { return Json.createReader(rdr).readObject(); } } + + public static javax.json.JsonArray getJsonArray(String serializedJson) { + try (StringReader rdr = new StringReader(serializedJson)) { + return Json.createReader(rdr).readArray(); + } + } } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index b19e80020ba..45807dc7cde 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -73,7 +73,7 @@ delete=Delete copyClipboard=Copy to Clipboard truncateMoreBtn=Read full {0} [+] truncateMoreTip=Click to read the full {0}. -truncateLessBtn=Collapse {0} [+] +truncateLessBtn=Collapse {0} [-] truncateLessTip=Click to collapse the {0}. yes=Yes no=No @@ -232,12 +232,12 @@ notification.access.revoked.datafile=You have been removed from a role in {0}. notification.checksumfail=One or more files in your upload failed checksum validation for dataset {1}. Please re-run the upload script. If the problem persists, please contact support. notification.ingest.completed=Your Dataset {2} has one or more tabular files that completed the tabular ingest process. These files will be available for download in their original formats and other formats for enhanced archival purposes after you publish the dataset. The archival .tab files are displayed in the file table. Please see the guides for more information about ingest and support for tabular files. notification.ingest.completedwitherrors=Your Dataset {2} has one or more tabular files that have been uploaded successfully but are not supported for tabular ingest. After you publish the dataset, these files will not have additional archival features. Please see the guides for more information about ingest and support for tabular files.

Files with incomplete ingest:{5} -notification.mail.import.filesystem=Globus transfer to Dataset {2} ({0}/dataset.xhtml?persistentId={1}) was successful. File(s) have been uploaded and verified. +notification.mail.import.filesystem=Dataset {2} ({0}/dataset.xhtml?persistentId={1}) has been successfully uploaded and verified. notification.mail.globus.upload.completed=Globus transfer to Dataset {2} was successful. File(s) have been uploaded and verified.

{3}
notification.mail.globus.download.completed=Globus transfer of file(s) from the dataset {2} was successful.

{3}
notification.mail.globus.upload.completedWithErrors=Globus transfer to Dataset {2} is complete with errors.

{3}
notification.mail.globus.download.completedWithErrors=Globus transfer from the dataset {2} is complete with errors.

{3}
-notification.import.filesystem=Globus transfer to Dataset {1} was successful. File(s) have been uploaded and verified. +notification.import.filesystem=Dataset {1} has been successfully uploaded and verified. notification.globus.upload.completed=Globus transfer to Dataset {1} was successful. File(s) have been uploaded and verified. notification.globus.download.completed=Globus transfer from the dataset {1} was successful. notification.globus.upload.completedWithErrors=Globus transfer to Dataset {1} is complete with errors. @@ -520,7 +520,7 @@ harvestclients.btn.add=Add Client harvestclients.tab.header.name=Nickname harvestclients.tab.header.url=URL harvestclients.tab.header.lastrun=Last Run -harvestclients.tab.header.lastresults=Last Results +harvestclients.tab.header.lastresults=Last Result harvestclients.tab.header.action=Actions harvestclients.tab.header.action.btn.run=Run Harvesting harvestclients.tab.header.action.btn.edit=Edit @@ -538,6 +538,10 @@ harvestclients.newClientDialog.nickname.helptext=Consists of letters, digits, un harvestclients.newClientDialog.nickname.required=Client nickname cannot be empty! harvestclients.newClientDialog.nickname.invalid=Client nickname can contain only letters, digits, underscores (_) and dashes (-); and must be at most 30 characters. harvestclients.newClientDialog.nickname.alreadyused=This nickname is already used. +harvestclients.newClientDialog.customHeader=Custom HTTP Header +harvestclients.newClientDialog.customHeader.helptext=(Optional) Custom HTTP header to add to requests, if required by this OAI server. +harvestclients.newClientDialog.customHeader.watermark=Enter an http header, as in header-name: header-value +harvestclients.newClientDialog.customHeader.invalid=Client header name can only contain letters, digits, underscores (_) and dashes (-); the entire header string must be in the form of "header-name: header-value" harvestclients.newClientDialog.type=Server Protocol harvestclients.newClientDialog.type.helptext=Only the OAI server protocol is currently supported. harvestclients.newClientDialog.type.OAI=OAI @@ -1523,7 +1527,7 @@ dataset.subjectDisplay.title=Subject dataset.contact.tip=Use email button above to contact. dataset.asterisk.tip=Asterisks indicate required fields dataset.message.uploadFiles.label=Upload Dataset Files -dataset.message.uploadFilesSingle.message=All file types are supported for upload and download in their original format. If you are uploading Excel, CSV, TSV, RData, Stata, or SPSS files, see the guides for tabular support and limitations. +dataset.message.uploadFilesSingle.message=All file types are supported for upload and download in their original format. If you are uploading Excel, CSV, TSV, RData, Stata, or SPSS files, see the guides for tabular support and limitations. dataset.message.uploadFilesMultiple.message=Multiple file upload/download methods are available for this dataset. Once you upload a file using one of these methods, your choice will be locked in for this dataset. dataset.message.editMetadata.label=Edit Dataset Metadata dataset.message.editMetadata.message=Add more metadata about this dataset to help others easily find it. @@ -1673,6 +1677,10 @@ file.finishGlobus=Globus Transfer has finished file.downloadFromGlobus=Download through Globus file.globus.transfer=Globus Transfer file.globus.of=of: +file.fromWebloader.tip=Upload a folder of files. This method retains the relative path structure from your local machine. (Using it will cancel any other types of uploads in progress on this page.) +file.fromWebloaderAfterCreate.tip=An option to upload a folder of files will be enabled after this dataset is created. +file.fromWebloader=Upload a Folder + file.api.httpDisabled=File upload via HTTP is not available for this installation of Dataverse. file.api.alreadyHasPackageFile=File upload via HTTP disabled since this dataset already contains a package file. file.replace.original=Original File @@ -2007,6 +2015,7 @@ file.remotelyStored=This file is stored remotely - click for more info file.auxfiles.download.header=Download Auxiliary Files # These types correspond to the AuxiliaryFile.Type enum. file.auxfiles.types.DP=Differentially Private Statistics +file.auxfiles.types.NcML=XML from NetCDF/HDF5 (NcML) # Add more types here file.auxfiles.unspecifiedTypes=Other Auxiliary Files @@ -2547,6 +2556,7 @@ admin.api.deleteUser.success=Authenticated User {0} deleted. #Files.java files.api.metadata.update.duplicateFile=Filename already exists at {0} +files.api.no.draft=No draft available for this file #Datasets.java datasets.api.updatePIDMetadata.failure.dataset.must.be.released=Modify Registration Metadata must be run on a published dataset. diff --git a/src/main/java/propertyFiles/License.properties b/src/main/java/propertyFiles/License.properties new file mode 100644 index 00000000000..2347fed9db6 --- /dev/null +++ b/src/main/java/propertyFiles/License.properties @@ -0,0 +1,4 @@ +license.cc0_1.0.description=Creative Commons CC0 1.0 Universal Public Domain Dedication. +license.cc_by_4.0.description=Creative Commons Attribution 4.0 International License. +license.cc0_1.0.name=CC0 1.0 +license.cc_by_4.0.name=CC-BY 4.0 diff --git a/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties b/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties index c12dd8f1b33..e5f8942f3ae 100644 --- a/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties +++ b/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties @@ -3,6 +3,7 @@ ado=application/x-stata-ado dbf=application/dbf dcm=application/dicom docx=application/vnd.openxmlformats-officedocument.wordprocessingml.document +eln=application/vnd.eln+zip emf=application/x-emf geojson=application/geo+json h5=application/x-h5 diff --git a/src/main/java/propertyFiles/MimeTypeDisplay.properties b/src/main/java/propertyFiles/MimeTypeDisplay.properties index 928419c0405..295ac226fa1 100644 --- a/src/main/java/propertyFiles/MimeTypeDisplay.properties +++ b/src/main/java/propertyFiles/MimeTypeDisplay.properties @@ -169,6 +169,7 @@ application/x-7z-compressed=7Z Archive application/x-xz=XZ Archive application/warc=Web Archive application/x-iso9660-image=Optical Disc Image +application/vnd.eln+zip=ELN Archive # Image image/gif=GIF Image image/jpeg=JPEG Image diff --git a/src/main/java/propertyFiles/MimeTypeFacets.properties b/src/main/java/propertyFiles/MimeTypeFacets.properties index 2cac63a7ad0..aaab66f20ae 100644 --- a/src/main/java/propertyFiles/MimeTypeFacets.properties +++ b/src/main/java/propertyFiles/MimeTypeFacets.properties @@ -170,6 +170,7 @@ application/x-7z-compressed=Archive application/x-xz=Archive application/warc=Archive application/x-iso9660-image=Archive +application/vnd.eln+zip=Archive # Image image/gif=Image image/jpeg=Image @@ -224,4 +225,4 @@ text/xml-graphml=Network Data # Other application/octet-stream=Unknown # Dataverse-specific -application/vnd.dataverse.file-package=Data \ No newline at end of file +application/vnd.dataverse.file-package=Data diff --git a/src/main/java/propertyFiles/astrophysics.properties b/src/main/java/propertyFiles/astrophysics.properties index a49b8b66510..6e04bac590f 100644 --- a/src/main/java/propertyFiles/astrophysics.properties +++ b/src/main/java/propertyFiles/astrophysics.properties @@ -50,9 +50,9 @@ datasetfieldtype.coverage.SkyFraction.description=The fraction of the sky repres datasetfieldtype.coverage.Polarization.description=The polarization coverage datasetfieldtype.redshiftType.description=RedshiftType string C "Redshift"; or "Optical" or "Radio" definitions of Doppler velocity used in the data object. datasetfieldtype.resolution.Redshift.description=The resolution in redshift (unitless) or Doppler velocity (km/s) in the data object. -datasetfieldtype.coverage.RedshiftValue.description=The value of the redshift (unitless) or Doppler velocity (km/s in the data object. -datasetfieldtype.coverage.Redshift.MinimumValue.description=The minimum value of the redshift (unitless) or Doppler velocity (km/s in the data object. -datasetfieldtype.coverage.Redshift.MaximumValue.description=The maximum value of the redshift (unitless) or Doppler velocity (km/s in the data object. +datasetfieldtype.coverage.RedshiftValue.description=The value of the redshift (unitless) or Doppler velocity (km/s) in the data object. +datasetfieldtype.coverage.Redshift.MinimumValue.description=The minimum value of the redshift (unitless) or Doppler velocity (km/s) in the data object. +datasetfieldtype.coverage.Redshift.MaximumValue.description=The maximum value of the redshift (unitless) or Doppler velocity (km/s) in the data object. datasetfieldtype.astroType.watermark= datasetfieldtype.astroFacility.watermark= datasetfieldtype.astroInstrument.watermark= @@ -102,4 +102,4 @@ controlledvocabulary.astroType.observation=Observation controlledvocabulary.astroType.object=Object controlledvocabulary.astroType.value=Value controlledvocabulary.astroType.valuepair=ValuePair -controlledvocabulary.astroType.survey=Survey \ No newline at end of file +controlledvocabulary.astroType.survey=Survey diff --git a/src/main/java/propertyFiles/citation.properties b/src/main/java/propertyFiles/citation.properties index 668542c92be..f35ede79b50 100644 --- a/src/main/java/propertyFiles/citation.properties +++ b/src/main/java/propertyFiles/citation.properties @@ -251,6 +251,7 @@ controlledvocabulary.subject.social_sciences=Social Sciences controlledvocabulary.subject.other=Other controlledvocabulary.publicationIDType.ark=ark controlledvocabulary.publicationIDType.arxiv=arXiv +controlledvocabulary.publicationIDType.cstr=cstr controlledvocabulary.publicationIDType.bibcode=bibcode controlledvocabulary.publicationIDType.doi=doi controlledvocabulary.publicationIDType.ean13=ean13 @@ -345,7 +346,7 @@ controlledvocabulary.language.galician=Galician controlledvocabulary.language.georgian=Georgian controlledvocabulary.language.german=German controlledvocabulary.language.greek_(modern)=Greek (modern) -controlledvocabulary.language.guarani=Guaraní +controlledvocabulary.language.guarani=Guaraní controlledvocabulary.language.gujarati=Gujarati controlledvocabulary.language.haitian,_haitian_creole=Haitian, Haitian Creole controlledvocabulary.language.hausa=Hausa @@ -405,7 +406,7 @@ controlledvocabulary.language.navajo,_navaho=Navajo, Navaho controlledvocabulary.language.northern_ndebele=Northern Ndebele controlledvocabulary.language.nepali=Nepali controlledvocabulary.language.ndonga=Ndonga -controlledvocabulary.language.norwegian_bokmal=Norwegian Bokmål +controlledvocabulary.language.norwegian_bokmal=Norwegian BokmÃ¥l controlledvocabulary.language.norwegian_nynorsk=Norwegian Nynorsk controlledvocabulary.language.norwegian=Norwegian controlledvocabulary.language.nuosu=Nuosu @@ -467,7 +468,7 @@ controlledvocabulary.language.urdu=Urdu controlledvocabulary.language.uzbek=Uzbek controlledvocabulary.language.venda=Venda controlledvocabulary.language.vietnamese=Vietnamese -controlledvocabulary.language.volapuk=Volapük +controlledvocabulary.language.volapuk=Volapük controlledvocabulary.language.walloon=Walloon controlledvocabulary.language.welsh=Welsh controlledvocabulary.language.wolof=Wolof @@ -477,4 +478,4 @@ controlledvocabulary.language.yiddish=Yiddish controlledvocabulary.language.yoruba=Yoruba controlledvocabulary.language.zhuang,_chuang=Zhuang, Chuang controlledvocabulary.language.zulu=Zulu -controlledvocabulary.language.not_applicable=Not applicable \ No newline at end of file +controlledvocabulary.language.not_applicable=Not applicable diff --git a/src/main/java/propertyFiles/codeMeta20.properties b/src/main/java/propertyFiles/codeMeta20.properties new file mode 100644 index 00000000000..c0e7eac6d4a --- /dev/null +++ b/src/main/java/propertyFiles/codeMeta20.properties @@ -0,0 +1,85 @@ +metadatablock.name=codeMeta20 +metadatablock.displayName=Software Metadata (CodeMeta 2.0) +datasetfieldtype.codeVersion.title=Software Version +datasetfieldtype.codeVersion.description=Version of the software instance, usually following some convention like SemVer etc. +datasetfieldtype.codeVersion.watermark=e.g. 0.2.1 or 1.3 or 2021.1 etc +datasetfieldtype.developmentStatus.title=Development Status +datasetfieldtype.developmentStatus.description=Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information. +datasetfieldtype.developmentStatus.watermark= Development Status +datasetfieldtype.codeRepository.title=Code Repository +datasetfieldtype.codeRepository.description=Link to the repository where the un-compiled, human-readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, Gitea, etc.). +datasetfieldtype.codeRepository.watermark=e.g. https://github.com/user/project +datasetfieldtype.applicationCategory.title=Application Category +datasetfieldtype.applicationCategory.description=Type of software application, e.g. Simulation, Analysis, Visualisation. +datasetfieldtype.applicationCategory.watermark= +datasetfieldtype.applicationSubCategory.title=Application Subcategory +datasetfieldtype.applicationSubCategory.description=Subcategory of the application, e.g. Arcade Game. +datasetfieldtype.applicationSubCategory.watermark= +datasetfieldtype.programmingLanguage.title=Programming Language +datasetfieldtype.programmingLanguage.description=The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...) +datasetfieldtype.programmingLanguage.watermark= +datasetfieldtype.runtimePlatform.title=Runtime Platform +datasetfieldtype.runtimePlatform.description=Runtime platform or script interpreter dependencies (e.g. Java 11, Python 3.10 or .Net Framework 4.8). +datasetfieldtype.runtimePlatform.watermark=e.g. Python 3.10 +datasetfieldtype.operatingSystem.title=Operating Systems +datasetfieldtype.operatingSystem.description=Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11). +datasetfieldtype.operatingSystem.watermark= +datasetfieldtype.targetProduct.title=Target Product +datasetfieldtype.targetProduct.description=Target Operating System / Product to which the code applies. If applies to several versions, just the product name can be used. +datasetfieldtype.targetProduct.watermark= +datasetfieldtype.buildInstructions.title=Build Instructions +datasetfieldtype.buildInstructions.description=Link to installation instructions/documentation +datasetfieldtype.buildInstructions.watermark=e.g. https://github.com/user/project/blob/main/BUILD.md +datasetfieldtype.softwareRequirementsItem.title=Software Requirements +datasetfieldtype.softwareRequirementsItem.description=Required software dependencies +datasetfieldtype.softwareRequirementsItem.watermark= +datasetfieldtype.softwareRequirements.title=Name & Version +datasetfieldtype.softwareRequirements.description=Name and version of the required software/library dependency +datasetfieldtype.softwareRequirements.watermark=e.g. Pandas 1.4.3 +datasetfieldtype.softwareRequirementsInfoUrl.title=Info URL +datasetfieldtype.softwareRequirementsInfoUrl.description=Link to required software/library homepage or documentation (ideally also versioned) +datasetfieldtype.softwareRequirementsInfoUrl.watermark=e.g. https://pandas.pydata.org/pandas-docs/version/1.4.3 +datasetfieldtype.softwareSuggestionsItem.title=Software Suggestions +datasetfieldtype.softwareSuggestionsItem.description=Optional dependencies, e.g. for optional features, code development, etc. +datasetfieldtype.softwareSuggestionsItem.watermark= +datasetfieldtype.softwareSuggestions.title=Name & Version +datasetfieldtype.softwareSuggestions.description=Name and version of the optional software/library dependency +datasetfieldtype.softwareSuggestions.watermark=e.g. Sphinx 5.0.2 +datasetfieldtype.softwareSuggestionsInfoUrl.title=Info URL +datasetfieldtype.softwareSuggestionsInfoUrl.description=Link to optional software/library homepage or documentation (ideally also versioned) +datasetfieldtype.softwareSuggestionsInfoUrl.watermark=e.g. https://www.sphinx-doc.org +datasetfieldtype.memoryRequirements.title=Memory Requirements +datasetfieldtype.memoryRequirements.description=Minimum memory requirements. +datasetfieldtype.memoryRequirements.watermark= +datasetfieldtype.processorRequirements.title=Processor Requirements +datasetfieldtype.processorRequirements.description=Processor architecture or other CPU requirements to run the application (e.g. IA64). +datasetfieldtype.processorRequirements.watermark= +datasetfieldtype.storageRequirements.title=Storage Requirements +datasetfieldtype.storageRequirements.description=Minimum storage requirements (e.g. free space required). +datasetfieldtype.storageRequirements.watermark= +datasetfieldtype.permissions.title=Permissions +datasetfieldtype.permissions.description=Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi). +datasetfieldtype.permissions.watermark= +datasetfieldtype.softwareHelp.title=Software Help/Documentation +datasetfieldtype.softwareHelp.description=Link to help texts or documentation +datasetfieldtype.softwareHelp.watermark=e.g. https://user.github.io/project/docs +datasetfieldtype.readme.title=Readme +datasetfieldtype.readme.description=Link to the README of the project +datasetfieldtype.readme.watermark=e.g. https://github.com/user/project/blob/main/README.md +datasetfieldtype.releaseNotes.title=Release Notes +datasetfieldtype.releaseNotes.description=Link to release notes +datasetfieldtype.releaseNotes.watermark=e.g. https://github.com/user/project/blob/main/docs/release-0.1.md +datasetfieldtype.contIntegration.title=Continuous Integration +datasetfieldtype.contIntegration.description=Link to continuous integration service +datasetfieldtype.contIntegration.watermark=e.g. https://github.com/user/project/actions +datasetfieldtype.issueTracker.title=Issue Tracker +datasetfieldtype.issueTracker.description=Link to software bug reporting or issue tracking system +datasetfieldtype.issueTracker.watermark=e.g. https://github.com/user/project/issues +controlledvocabulary.developmentStatus.concept=Concept +controlledvocabulary.developmentStatus.wip=WIP +controlledvocabulary.developmentStatus.active=Active +controlledvocabulary.developmentStatus.inactive=Inactive +controlledvocabulary.developmentStatus.unsupported=Unsupported +controlledvocabulary.developmentStatus.moved=Moved +controlledvocabulary.developmentStatus.suspended=Suspended +controlledvocabulary.developmentStatus.abandoned=Abandoned diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index be02bb1b090..58592775a98 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -3,11 +3,36 @@ dataverse.version=${project.version} dataverse.build= +# Default only for containers! (keep mimicking the current behaviour - +# changing that is part of https://github.com/IQSS/dataverse/issues/6636) +%ct.dataverse.fqdn=localhost +%ct.dataverse.siteUrl=http://${dataverse.fqdn}:8080 + +# FILES +dataverse.files.directory=/tmp/dataverse + +# SEARCH INDEX +dataverse.solr.host=localhost +# Activating mp config profile -Dmp.config.profile=ct changes default to "solr" as DNS name +%ct.dataverse.solr.host=solr +dataverse.solr.port=8983 +dataverse.solr.protocol=http +dataverse.solr.core=collection1 +dataverse.solr.path=/solr/${dataverse.solr.core} + # DATABASE dataverse.db.host=localhost dataverse.db.port=5432 dataverse.db.user=dataverse dataverse.db.name=dataverse + +# RSERVE +dataverse.rserve.host=localhost +dataverse.rserve.port=6311 +dataverse.rserve.user=rserve +dataverse.rserve.password=rserve +dataverse.rserve.tempdir=/tmp/Rserv + # OAI SERVER dataverse.oai.server.maxidentifiers=100 dataverse.oai.server.maxrecords=10 diff --git a/src/main/resources/db/migration/V5.12.1.1__8671-sorting_licenses.sql b/src/main/resources/db/migration/V5.12.1.1__8671-sorting_licenses.sql new file mode 100644 index 00000000000..6fe3f1142c2 --- /dev/null +++ b/src/main/resources/db/migration/V5.12.1.1__8671-sorting_licenses.sql @@ -0,0 +1,5 @@ +ALTER TABLE license +ADD COLUMN IF NOT EXISTS sortorder BIGINT NOT NULL DEFAULT 0; + +CREATE INDEX IF NOT EXISTS license_sortorder_id +ON license (sortorder, id); \ No newline at end of file diff --git a/src/main/resources/db/migration/V5.12.1.2__7715-signed-urls-for-tools.sql b/src/main/resources/db/migration/V5.12.1.2__7715-signed-urls-for-tools.sql new file mode 100644 index 00000000000..5e13de057dd --- /dev/null +++ b/src/main/resources/db/migration/V5.12.1.2__7715-signed-urls-for-tools.sql @@ -0,0 +1 @@ +ALTER TABLE externaltool ADD COLUMN IF NOT EXISTS allowedapicalls TEXT; diff --git a/src/main/resources/db/migration/V5.12.1.3__8840-improve-guestbook-estimates.sql b/src/main/resources/db/migration/V5.12.1.3__8840-improve-guestbook-estimates.sql new file mode 100644 index 00000000000..91ab5253f9c --- /dev/null +++ b/src/main/resources/db/migration/V5.12.1.3__8840-improve-guestbook-estimates.sql @@ -0,0 +1 @@ +ALTER TABLE guestbookresponse SET (autovacuum_analyze_scale_factor = 0.01); \ No newline at end of file diff --git a/src/main/resources/db/migration/V5.12.1.4__9153-extract-metadata.sql b/src/main/resources/db/migration/V5.12.1.4__9153-extract-metadata.sql new file mode 100644 index 00000000000..48230d21032 --- /dev/null +++ b/src/main/resources/db/migration/V5.12.1.4__9153-extract-metadata.sql @@ -0,0 +1 @@ +ALTER TABLE externaltool ADD COLUMN IF NOT EXISTS requirements TEXT; diff --git a/src/main/resources/db/migration/V5.12.1.5__9231_custom_headers_oai_requests.sql b/src/main/resources/db/migration/V5.12.1.5__9231_custom_headers_oai_requests.sql new file mode 100644 index 00000000000..fe6d717b2a3 --- /dev/null +++ b/src/main/resources/db/migration/V5.12.1.5__9231_custom_headers_oai_requests.sql @@ -0,0 +1 @@ +ALTER TABLE harvestingclient ADD COLUMN IF NOT EXISTS customhttpheaders TEXT; diff --git a/src/main/resources/db/migration/V5.9.0.1__7440-configurable-license-list.sql b/src/main/resources/db/migration/V5.9.0.1__7440-configurable-license-list.sql index cb76b2270a4..a8f7f41e2ef 100644 --- a/src/main/resources/db/migration/V5.9.0.1__7440-configurable-license-list.sql +++ b/src/main/resources/db/migration/V5.9.0.1__7440-configurable-license-list.sql @@ -2,7 +2,6 @@ ALTER TABLE termsofuseandaccess ADD COLUMN IF NOT EXISTS license_id BIGINT; DO $$ BEGIN - BEGIN ALTER TABLE termsofuseandaccess ADD CONSTRAINT fk_termsofuseandcesss_license_id foreign key (license_id) REFERENCES license(id); EXCEPTION diff --git a/src/main/webapp/WEB-INF/glassfish-web.xml b/src/main/webapp/WEB-INF/glassfish-web.xml index ecd3ba15c40..e56d7013abf 100644 --- a/src/main/webapp/WEB-INF/glassfish-web.xml +++ b/src/main/webapp/WEB-INF/glassfish-web.xml @@ -8,9 +8,15 @@ Keep a copy of the generated servlet class' java code. + + - + + diff --git a/src/main/webapp/dashboard.xhtml b/src/main/webapp/dashboard.xhtml index c5b6a507a92..5a72b52937b 100644 --- a/src/main/webapp/dashboard.xhtml +++ b/src/main/webapp/dashboard.xhtml @@ -42,7 +42,7 @@ #{dashboardPage.numberOfHarvestedDatasets}

- +

diff --git a/src/main/webapp/dataset-license-terms.xhtml b/src/main/webapp/dataset-license-terms.xhtml index 1cbf297bf89..86e52092622 100644 --- a/src/main/webapp/dataset-license-terms.xhtml +++ b/src/main/webapp/dataset-license-terms.xhtml @@ -25,6 +25,7 @@ +
@@ -236,11 +237,12 @@
-
+ or !empty termsOfUseAndAccess.studyCompletion}">
  diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 1bb862721a5..6b91f815d9a 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -846,6 +846,7 @@ + @@ -1789,6 +1790,7 @@ +