From a79015f6785fbd572e01033584dfafe25fb153bc Mon Sep 17 00:00:00 2001 From: Don Sizemore Date: Wed, 20 Nov 2024 10:09:41 -0500 Subject: [PATCH 01/20] #11037 limit beta testing deployments to one concurrent action at a time --- .github/workflows/deploy_beta_testing.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/deploy_beta_testing.yml b/.github/workflows/deploy_beta_testing.yml index 4cec08564a4..eca8416732a 100644 --- a/.github/workflows/deploy_beta_testing.yml +++ b/.github/workflows/deploy_beta_testing.yml @@ -5,6 +5,10 @@ on: branches: - develop +concurrency: + group: deploy-beta-testing + cancel-in-progress: false + jobs: build: runs-on: ubuntu-latest From 4449679d2af3614de54a077aaaae4ea2551ed22b Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 21 Oct 2024 17:14:19 -0400 Subject: [PATCH 02/20] quick draft implementation of addressing issue 1. from #10909. --- .../api/imports/ImportGenericServiceBean.java | 41 +++++++++++++++---- .../api/imports/ImportServiceBean.java | 13 +++++- .../harvest/client/HarvestingClient.java | 23 ++++++++++- src/main/resources/db/migration/V6.4.0.1.sql | 2 +- 4 files changed, 65 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java index 41a57665010..bf8d068a69c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java @@ -150,12 +150,16 @@ public DatasetDTO processXML( XMLStreamReader xmlr, ForeignMetadataFormatMapping } - // Helper method for importing harvested Dublin Core xml. + // Helper methods for importing harvested Dublin Core xml. // Dublin Core is considered a mandatory, built in metadata format mapping. // It is distributed as required content, in reference_data.sql. // Note that arbitrary formatting tags are supported for the outer xml // wrapper. -- L.A. 4.5 public DatasetDTO processOAIDCxml(String DcXmlToParse) throws XMLStreamException { + return processOAIDCxml(DcXmlToParse, null); + } + + public DatasetDTO processOAIDCxml(String DcXmlToParse, String oaiIdentifier) throws XMLStreamException { // look up DC metadata mapping: ForeignMetadataFormatMapping dublinCoreMapping = findFormatMappingByName(DCTERMS); @@ -185,18 +189,37 @@ public DatasetDTO processOAIDCxml(String DcXmlToParse) throws XMLStreamException datasetDTO.getDatasetVersion().setVersionState(DatasetVersion.VersionState.RELEASED); - // Our DC import handles the contents of the dc:identifier field - // as an "other id". In the context of OAI harvesting, we expect - // the identifier to be a global id, so we need to rearrange that: + // In some cases, the identifier that we want to use for the dataset is + // already supplied to the method explicitly. For example, in some + // harvesting cases we'll want to use the OAI identifier (the identifier + // from the
section of the OAI record) for that purpose, without + // expecting to find a valid persistent id in the body of the DC record: - String identifier = getOtherIdFromDTO(datasetDTO.getDatasetVersion()); - logger.fine("Imported identifier: "+identifier); + String globalIdentifier; - String globalIdentifier = reassignIdentifierAsGlobalId(identifier, datasetDTO); - logger.fine("Detected global identifier: "+globalIdentifier); + if (oaiIdentifier != null) { + logger.fine("Attempting to use " + oaiIdentifier + " as the persistentId of the imported dataset"); + + globalIdentifier = reassignIdentifierAsGlobalId(oaiIdentifier, datasetDTO); + } else { + // Our DC import handles the contents of the dc:identifier field + // as an "other id". Unless we are using an externally supplied + // global id, we will be using the first such "other id" that we + // can parse and recognize as the global id for the imported dataset + // (note that this is the default behavior during harvesting), + // so we need to reaassign it accordingly: + String identifier = getOtherIdFromDTO(datasetDTO.getDatasetVersion()); + logger.fine("Imported identifier: " + identifier); + + globalIdentifier = reassignIdentifierAsGlobalId(identifier, datasetDTO); + logger.fine("Detected global identifier: " + globalIdentifier); + } if (globalIdentifier == null) { - throw new EJBException("Failed to find a global identifier in the OAI_DC XML record."); + String exceptionMsg = oaiIdentifier == null ? + "Failed to find a global identifier in the OAI_DC XML record." : + "Failed to parse the supplied identifier as a valid Persistent Id"; + throw new EJBException(exceptionMsg); } return datasetDTO; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java index ee4609a7c56..d0a0629e1ae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java @@ -208,7 +208,13 @@ public JsonObjectBuilder handleFile(DataverseRequest dataverseRequest, Dataverse } @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, HarvestingClient harvestingClient, String harvestIdentifier, String metadataFormat, File metadataFile, Date oaiDateStamp, PrintWriter cleanupLog) throws ImportException, IOException { + public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, + HarvestingClient harvestingClient, + String harvestIdentifier, + String metadataFormat, + File metadataFile, + Date oaiDateStamp, + PrintWriter cleanupLog) throws ImportException, IOException { if (harvestingClient == null || harvestingClient.getDataverse() == null) { throw new ImportException("importHarvestedDataset called with a null harvestingClient, or an invalid harvestingClient."); } @@ -245,7 +251,10 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve logger.fine("importing DC "+metadataFile.getAbsolutePath()); try { String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath())); - dsDTO = importGenericService.processOAIDCxml(xmlToParse); + String suggestedIdentifier = harvestingClient.isUseOaiIdentifiersAsPids() + ? harvestIdentifier + : null; + dsDTO = importGenericService.processOAIDCxml(xmlToParse, suggestedIdentifier); } catch (IOException | XMLStreamException e) { throw new ImportException("Failed to process Dublin Core XML record: "+ e.getClass() + " (" + e.getMessage() + ")"); } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java index 0667f5594ce..ec26729b685 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java @@ -214,6 +214,7 @@ public void setArchiveDescription(String archiveDescription) { this.archiveDescription = archiveDescription; } + @Column(columnDefinition="TEXT") private String harvestingSet; public String getHarvestingSet() { @@ -252,8 +253,26 @@ public void setAllowHarvestingMissingCVV(boolean allowHarvestingMissingCVV) { this.allowHarvestingMissingCVV = allowHarvestingMissingCVV; } - // TODO: do we need "orphanRemoval=true"? -- L.A. 4.4 - // TODO: should it be @OrderBy("startTime")? -- L.A. 4.4 + private Boolean useListRecords; + + public Boolean isUseListRecords() { + return useListRecords; + } + + public void setUseListrecords(boolean useListRecords) { + this.useListRecords = useListRecords; + } + + private Boolean useOaiIdAsPid; + + public Boolean isUseOaiIdentifiersAsPids() { + return useOaiIdAsPid; + } + + public void setUseOaiIdentifiersAsPids(boolean useOaiIdAsPid) { + this.useOaiIdAsPid = useOaiIdAsPid; + } + @OneToMany(mappedBy="harvestingClient", cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) @OrderBy("id") private List harvestHistory; diff --git a/src/main/resources/db/migration/V6.4.0.1.sql b/src/main/resources/db/migration/V6.4.0.1.sql index 0bcd87dd736..438c52a192e 100644 --- a/src/main/resources/db/migration/V6.4.0.1.sql +++ b/src/main/resources/db/migration/V6.4.0.1.sql @@ -1,4 +1,4 @@ -- Adding a case-insensitive index related to #11003 -- -CREATE UNIQUE INDEX IF NOT EXISTS INDEX_DVOBJECT_authority_protocol_upper_identifier ON dvobject (authority, protocol, UPPER(identifier)); \ No newline at end of file +CREATE UNIQUE INDEX IF NOT EXISTS INDEX_DVOBJECT_authority_protocol_upper_identifier ON dvobject (authority, protocol, UPPER(identifier)); From 2656ccdcc0c7f59eebfcf52e82c011c829b7dda7 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 29 Oct 2024 10:27:09 -0400 Subject: [PATCH 03/20] Adding the new client options to the json printer and parser #10909 --- .../java/edu/harvard/iq/dataverse/util/json/JsonParser.java | 2 ++ .../java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index 3f60317655a..8bb8fd93dd1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -1052,6 +1052,8 @@ public String parseHarvestingClient(JsonObject obj, HarvestingClient harvestingC harvestingClient.setHarvestingSet(obj.getString("set",null)); harvestingClient.setCustomHttpHeaders(obj.getString("customHeaders", null)); harvestingClient.setAllowHarvestingMissingCVV(obj.getBoolean("allowHarvestingMissingCVV", false)); + harvestingClient.setUseListrecords(obj.getBoolean("useListRecords", false)); + harvestingClient.setUseOaiIdentifiersAsPids(obj.getBoolean("useOaiIdentifiersAsPids", false)); return dataverseAlias; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index f884d313d64..6666a7f0e7d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -1013,6 +1013,8 @@ public static JsonObjectBuilder json(HarvestingClient harvestingClient) { add("status", harvestingClient.isHarvestingNow() ? "inProgress" : "inActive"). add("customHeaders", harvestingClient.getCustomHttpHeaders()). add("allowHarvestingMissingCVV", harvestingClient.getAllowHarvestingMissingCVV()). + add("useListRecords", harvestingClient.isUseListRecords()). + add("useOaiIdentifiersAsPids", harvestingClient.isUseOaiIdentifiersAsPids()). add("lastHarvest", harvestingClient.getLastHarvestTime() == null ? null : harvestingClient.getLastHarvestTime().toString()). add("lastResult", harvestingClient.getLastResult()). add("lastSuccessful", harvestingClient.getLastSuccessfulHarvestTime() == null ? null : harvestingClient.getLastSuccessfulHarvestTime().toString()). From 5c043cdb642bc9fa17cc0620292b11004813ed56 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 1 Nov 2024 10:26:39 -0400 Subject: [PATCH 04/20] we DO want to include the persistent id in the search cards for all harvested datasets. #10909. (that whole block of extra checks on the harvest "style" may be redundant by now - I'll think about it) --- src/main/java/edu/harvard/iq/dataverse/DataCitation.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java index 3977023fc4b..02fb59751fb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java @@ -792,6 +792,7 @@ private GlobalId getPIDFrom(DatasetVersion dsv, DvObject dv) { if (!dsv.getDataset().isHarvested() || HarvestingClient.HARVEST_STYLE_VDC.equals(dsv.getDataset().getHarvestedFrom().getHarvestStyle()) || HarvestingClient.HARVEST_STYLE_ICPSR.equals(dsv.getDataset().getHarvestedFrom().getHarvestStyle()) + || HarvestingClient.HARVEST_STYLE_DEFAULT.equals(dsv.getDataset().getHarvestedFrom().getHarvestStyle()) || HarvestingClient.HARVEST_STYLE_DATAVERSE .equals(dsv.getDataset().getHarvestedFrom().getHarvestStyle())) { if(!isDirect()) { From b7efee0cfbd45104c5432b69a2732def5889b868 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 22 Nov 2024 19:50:21 -0500 Subject: [PATCH 05/20] a flyway script for the "use the oai id as the pid" harvesting client flag. --- src/main/resources/db/migration/V6.4.0.3.sql | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 src/main/resources/db/migration/V6.4.0.3.sql diff --git a/src/main/resources/db/migration/V6.4.0.3.sql b/src/main/resources/db/migration/V6.4.0.3.sql new file mode 100644 index 00000000000..307d8ed206c --- /dev/null +++ b/src/main/resources/db/migration/V6.4.0.3.sql @@ -0,0 +1,2 @@ +-- Add this boolean flag to accommodate a new harvesting client feature +ALTER TABLE harvestingclient ADD COLUMN IF NOT EXISTS useOaiIdAsPid BOOLEAN DEFAULT FALSE; From eca03896f3d0a1e36505ea958e7dc77c118d62c5 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 22 Nov 2024 19:53:25 -0500 Subject: [PATCH 06/20] removed the part of the cherry-picked commit that I'm not going to need in this branch. --- .../dataverse/harvest/client/HarvestingClient.java | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java index ec26729b685..de9cc7c0db6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java @@ -253,19 +253,9 @@ public void setAllowHarvestingMissingCVV(boolean allowHarvestingMissingCVV) { this.allowHarvestingMissingCVV = allowHarvestingMissingCVV; } - private Boolean useListRecords; + private boolean useOaiIdAsPid; - public Boolean isUseListRecords() { - return useListRecords; - } - - public void setUseListrecords(boolean useListRecords) { - this.useListRecords = useListRecords; - } - - private Boolean useOaiIdAsPid; - - public Boolean isUseOaiIdentifiersAsPids() { + public boolean isUseOaiIdentifiersAsPids() { return useOaiIdAsPid; } From 29114175c316dda01e0e240d63951cb89a4f9fdd Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 22 Nov 2024 19:55:40 -0500 Subject: [PATCH 07/20] removed pieces of another cherry-picked commit not needed in this branch. --- src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java | 1 - .../java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java | 1 - 2 files changed, 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index 8bb8fd93dd1..232b7431a24 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -1052,7 +1052,6 @@ public String parseHarvestingClient(JsonObject obj, HarvestingClient harvestingC harvestingClient.setHarvestingSet(obj.getString("set",null)); harvestingClient.setCustomHttpHeaders(obj.getString("customHeaders", null)); harvestingClient.setAllowHarvestingMissingCVV(obj.getBoolean("allowHarvestingMissingCVV", false)); - harvestingClient.setUseListrecords(obj.getBoolean("useListRecords", false)); harvestingClient.setUseOaiIdentifiersAsPids(obj.getBoolean("useOaiIdentifiersAsPids", false)); return dataverseAlias; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index 6666a7f0e7d..91af13c79a3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -1013,7 +1013,6 @@ public static JsonObjectBuilder json(HarvestingClient harvestingClient) { add("status", harvestingClient.isHarvestingNow() ? "inProgress" : "inActive"). add("customHeaders", harvestingClient.getCustomHttpHeaders()). add("allowHarvestingMissingCVV", harvestingClient.getAllowHarvestingMissingCVV()). - add("useListRecords", harvestingClient.isUseListRecords()). add("useOaiIdentifiersAsPids", harvestingClient.isUseOaiIdentifiersAsPids()). add("lastHarvest", harvestingClient.getLastHarvestTime() == null ? null : harvestingClient.getLastHarvestTime().toString()). add("lastResult", harvestingClient.getLastResult()). From 0967b7a8363381d8e6f5c52a230215f87cf4f671 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 22 Nov 2024 20:20:35 -0500 Subject: [PATCH 08/20] A "hybrid" implementation of the support for using OAI identifiers for the pid of the imported datasets - merging 2 different approaches implemented in the PRs --- .../api/imports/ImportGenericServiceBean.java | 26 ++++++++++++++++--- .../api/imports/ImportServiceBean.java | 7 ++--- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java index bf8d068a69c..7bce0947a0e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java @@ -156,10 +156,10 @@ public DatasetDTO processXML( XMLStreamReader xmlr, ForeignMetadataFormatMapping // Note that arbitrary formatting tags are supported for the outer xml // wrapper. -- L.A. 4.5 public DatasetDTO processOAIDCxml(String DcXmlToParse) throws XMLStreamException { - return processOAIDCxml(DcXmlToParse, null); + return processOAIDCxml(DcXmlToParse, null, false); } - public DatasetDTO processOAIDCxml(String DcXmlToParse, String oaiIdentifier) throws XMLStreamException { + public DatasetDTO processOAIDCxml(String DcXmlToParse, String oaiIdentifier, boolean preferSuppliedIdentifier) throws XMLStreamException { // look up DC metadata mapping: ForeignMetadataFormatMapping dublinCoreMapping = findFormatMappingByName(DCTERMS); @@ -208,7 +208,7 @@ public DatasetDTO processOAIDCxml(String DcXmlToParse, String oaiIdentifier) thr // can parse and recognize as the global id for the imported dataset // (note that this is the default behavior during harvesting), // so we need to reaassign it accordingly: - String identifier = getOtherIdFromDTO(datasetDTO.getDatasetVersion()); + String identifier = selectIdentifier(datasetDTO.getDatasetVersion(), oaiIdentifier, preferSuppliedIdentifier); logger.fine("Imported identifier: " + identifier); globalIdentifier = reassignIdentifierAsGlobalId(identifier, datasetDTO); @@ -367,8 +367,16 @@ private FieldDTO makeDTO(DatasetFieldType dataverseFieldType, FieldDTO value, St return value; } - private String getOtherIdFromDTO(DatasetVersionDTO datasetVersionDTO) { + private String selectIdentifier(DatasetVersionDTO datasetVersionDTO, String suppliedIdentifier, boolean preferSuppliedIdentifier) { List otherIds = new ArrayList<>(); + + if (suppliedIdentifier != null && preferSuppliedIdentifier) { + // This supplied identifier (in practice, his is likely the OAI-PMH + // identifier from the
section) will be our first + // choice candidate for the pid of the imported dataset: + otherIds.add(suppliedIdentifier); + } + for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); @@ -386,6 +394,16 @@ private String getOtherIdFromDTO(DatasetVersionDTO datasetVersionDTO) { } } } + + if (suppliedIdentifier != null && !preferSuppliedIdentifier) { + // Unless specifically instructed to prefer this extra identifier + // (in practice, this is likely the OAI-PMH identifier from the + //
section), we will try to use it as the *last* + // possible candidate for the pid, so, adding it to the end of the + // list: + otherIds.add(suppliedIdentifier); + } + if (!otherIds.isEmpty()) { // We prefer doi or hdl identifiers like "doi:10.7910/DVN/1HE30F" for (String otherId : otherIds) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java index d0a0629e1ae..7dc2aed799e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java @@ -250,11 +250,8 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, } else if ("dc".equalsIgnoreCase(metadataFormat) || "oai_dc".equals(metadataFormat)) { logger.fine("importing DC "+metadataFile.getAbsolutePath()); try { - String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath())); - String suggestedIdentifier = harvestingClient.isUseOaiIdentifiersAsPids() - ? harvestIdentifier - : null; - dsDTO = importGenericService.processOAIDCxml(xmlToParse, suggestedIdentifier); + String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath())); + dsDTO = importGenericService.processOAIDCxml(xmlToParse, harvestIdentifier, harvestingClient.isUseOaiIdentifiersAsPids()); } catch (IOException | XMLStreamException e) { throw new ImportException("Failed to process Dublin Core XML record: "+ e.getClass() + " (" + e.getMessage() + ")"); } From 00943e1e2a6ce9825dbc29d898431664cad07498 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 24 Nov 2024 20:35:27 -0500 Subject: [PATCH 09/20] guide entry --- doc/sphinx-guides/source/api/native-api.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 99a8a9f7cf4..1f36691be0d 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -5246,6 +5246,7 @@ Shows a Harvesting Client with a defined nickname:: "dataverseAlias": "fooData", "nickName": "myClient", "set": "fooSet", + "useOaiIdentifiersAsPids": false "schedule": "none", "status": "inActive", "lastHarvest": "Thu Oct 13 14:48:57 EDT 2022", @@ -5280,6 +5281,7 @@ The following optional fields are supported: - style: Defaults to "default" - a generic OAI archive. (Make sure to use "dataverse" when configuring harvesting from another Dataverse installation). - customHeaders: This can be used to configure this client with a specific HTTP header that will be added to every OAI request. This is to accommodate a use case where the remote server requires this header to supply some form of a token in order to offer some content not available to other clients. See the example below. Multiple headers can be supplied separated by `\\n` - actual "backslash" and "n" characters, not a single "new line" character. - allowHarvestingMissingCVV: Flag to allow datasets to be harvested with Controlled Vocabulary Values that existed in the originating Dataverse Project but are not in the harvesting Dataverse Project. (Default is false). Currently only settable using API. +- useOaiIdentifiersAsPids: Defaults to false; if set to true, Harvester will attempt to use the identifier from the OAI-PMH record header as the **first choice** for the persistent id of the harvested dataset. When set to false, Dataverse will still attempt to use this identifier, but only if none of the `` entries in the OAI_DC record contain a valid persistent id (this is new as of v6.5). Generally, the API will accept the output of the GET version of the API for an existing client as valid input, but some fields will be ignored. For example, as of writing this there is no way to configure a harvesting schedule via this API. From cc7fb45d43f8b842d079d0a688fc2487bd0f56f8 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 24 Nov 2024 20:54:32 -0500 Subject: [PATCH 10/20] release note. --- doc/release-notes/11049-oai-identifiers-as-pids.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 doc/release-notes/11049-oai-identifiers-as-pids.md diff --git a/doc/release-notes/11049-oai-identifiers-as-pids.md b/doc/release-notes/11049-oai-identifiers-as-pids.md new file mode 100644 index 00000000000..2f857bf2198 --- /dev/null +++ b/doc/release-notes/11049-oai-identifiers-as-pids.md @@ -0,0 +1,5 @@ +## When harvesting, Dataverse can now use the identifier from the OAI-PMH record header as the persistent id for the harvested dataset. + +This will allow harvesting from sources that do not include a persistent id in their oai_dc metadata records, but use valid dois or handles as the OAI-PMH record header identifiers. + +It is also possible to optionally configure a harvesting client to use this OAI-PMH identifier as the **preferred** choice for the persistent id. See the [Harvesting Clients API](https://guides.dataverse.org/en/6.5/api/native-api.html#create-a-harvesting-client) section of the Guides for more information. \ No newline at end of file From d6fc24022d4cd0cd6759064b7dd01e6885ad7c12 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 24 Nov 2024 21:12:58 -0500 Subject: [PATCH 11/20] json files for the new tests (from PR #11010 by @stevenferey) --- .../json/importGenericWithOtherId.json | 307 ++++++++++++++++++ .../json/importGenericWithoutOtherId.json | 258 +++++++++++++++ 2 files changed, 565 insertions(+) create mode 100644 src/test/resources/json/importGenericWithOtherId.json create mode 100644 src/test/resources/json/importGenericWithoutOtherId.json diff --git a/src/test/resources/json/importGenericWithOtherId.json b/src/test/resources/json/importGenericWithOtherId.json new file mode 100644 index 00000000000..af9241393e9 --- /dev/null +++ b/src/test/resources/json/importGenericWithOtherId.json @@ -0,0 +1,307 @@ +{ + "UNF": "UNF", + "createTime": "2014-11-12 12:17:55 -05", + "distributionDate": "Distribution Date", + "id": 2, + "lastUpdateTime": "2014-11-12 12:20:32 -05", + "metadataBlocks": { + "astrophysics": { + "displayName": "Astronomy and Astrophysics Metadata", + "fields": [ + { + "multiple": true, + "typeClass": "controlledVocabulary", + "typeName": "astroType", + "value": [ + "Image", + "Mosaic", + "EventList" + ] + } + ] + }, + "citation": { + "displayName": "Citation Metadata", + "fields": [ + { + "multiple": false, + "typeClass": "primitive", + "typeName": "title", + "value": "My Dataset" + }, + { + "multiple": true, + "typeClass": "compound", + "typeName": "author", + "value": [ + { + "authorAffiliation": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorAffiliation", + "value": "Top" + }, + "authorIdentifier": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorIdentifier", + "value": "ellenid" + }, + "authorIdentifierScheme": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "authorIdentifierScheme", + "value": "ORCID" + }, + "authorName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorName", + "value": "Privileged, Pete" + } + }, + { + "authorAffiliation": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorAffiliation", + "value": "Bottom" + }, + "authorIdentifier": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorIdentifier", + "value": "audreyId" + }, + "authorIdentifierScheme": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "authorIdentifierScheme", + "value": "DAISY" + }, + "authorName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorName", + "value": "Awesome, Audrey" + } + } + ] + }, + { + "multiple": true, + "typeClass": "primitive", + "typeName": "datasetContact", + "value": [ + "pete@malinator.com" + ] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescription", + "value": "Here is my description" + }, + { + "multiple": true, + "typeClass": "controlledVocabulary", + "typeName": "subject", + "value": [ + "Arts and Humanities", + "Astronomy and Astrophysics", + "Business and Management" + ] + }, + { + "multiple": true, + "typeClass": "primitive", + "typeName": "keyword", + "value": [ + "keyword1", + "keyword2" + ] + }, + { + "multiple": true, + "typeClass": "compound", + "typeName": "otherId", + "value": [ + { + "otherIdAgency": { + "multiple": false, + "typeClass": "primitive", + "typeName": "otherIdAgency", + "value": "my agency" + }, + "otherIdValue": { + "multiple": false, + "typeClass": "primitive", + "typeName": "otherIdValue", + "value": "otherId" + } + }, + { + "otherIdAgency": { + "multiple": false, + "typeClass": "primitive", + "typeName": "otherIdAgency", + "value": "another agency" + }, + "otherIdValue": { + "multiple": false, + "typeClass": "primitive", + "typeName": "otherIdValue", + "value": "otherId2" + } + }, + { + "otherIdAgency": { + "multiple": false, + "typeClass": "primitive", + "typeName": "otherIdAgency", + "value": "another agency" + }, + "otherIdValue": { + "multiple": false, + "typeClass": "primitive", + "typeName": "otherIdValue", + "value": "doi:10.7910/DVN/TJCLKP" + } + } + ] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "depositor", + "value": "Ellen K" + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "dateOfDeposit", + "value": "2014-11-12" + } + ] + }, + "geospatial": { + "displayName": "Geospatial Metadata", + "fields": [ + { + "multiple": true, + "typeClass": "compound", + "typeName": "geographicCoverage", + "value": [ + { + "city": { + "multiple": false, + "typeClass": "primitive", + "typeName": "city", + "value": "Arlington" + }, + "country": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "country", + "value": "United States" + }, + "state": { + "multiple": false, + "typeClass": "primitive", + "typeName": "state", + "value": "MA" + } + }, + { + "city": { + "multiple": false, + "typeClass": "primitive", + "typeName": "city", + "value": "beachcity" + }, + "country": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "country", + "value": "Aruba" + }, + "state": { + "multiple": false, + "typeClass": "primitive", + "typeName": "state", + "value": "beach" + } + } + ] + }, + { + "multiple": false, + "typeClass": "compound", + "typeName": "geographicBoundingBox", + "value": + { + "eastLongitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "eastLongitude", + "value": "23" + }, + "northLatitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "northLatitude", + "value": "786" + }, + "southLatitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "southLatitude", + "value": "34" + }, + "westLongitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "westLongitude", + "value": "45" + } + } + + } + ] + }, + "socialscience": { + "displayName": "Social Science and Humanities Metadata", + "fields": [ + { + "multiple": true, + "typeClass": "compound", + "typeName": "software", + "value": [ + { + "softwareName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "softwareName", + "value": "softwareName" + }, + "softwareVersion": { + "multiple": false, + "typeClass": "primitive", + "typeName": "softwareVersion", + "value": "software version" + } + } + ] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "unitOfAnalysis", + "value": "unit of analysis" + } + ] + } + }, + "productionDate": "Production Date", + "versionState": "DRAFT" + } diff --git a/src/test/resources/json/importGenericWithoutOtherId.json b/src/test/resources/json/importGenericWithoutOtherId.json new file mode 100644 index 00000000000..ceb2263c2cf --- /dev/null +++ b/src/test/resources/json/importGenericWithoutOtherId.json @@ -0,0 +1,258 @@ +{ + "UNF": "UNF", + "createTime": "2014-11-12 12:17:55 -05", + "distributionDate": "Distribution Date", + "id": 2, + "lastUpdateTime": "2014-11-12 12:20:32 -05", + "metadataBlocks": { + "astrophysics": { + "displayName": "Astronomy and Astrophysics Metadata", + "fields": [ + { + "multiple": true, + "typeClass": "controlledVocabulary", + "typeName": "astroType", + "value": [ + "Image", + "Mosaic", + "EventList" + ] + } + ] + }, + "citation": { + "displayName": "Citation Metadata", + "fields": [ + { + "multiple": false, + "typeClass": "primitive", + "typeName": "title", + "value": "My Dataset" + }, + { + "multiple": true, + "typeClass": "compound", + "typeName": "author", + "value": [ + { + "authorAffiliation": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorAffiliation", + "value": "Top" + }, + "authorIdentifier": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorIdentifier", + "value": "ellenid" + }, + "authorIdentifierScheme": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "authorIdentifierScheme", + "value": "ORCID" + }, + "authorName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorName", + "value": "Privileged, Pete" + } + }, + { + "authorAffiliation": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorAffiliation", + "value": "Bottom" + }, + "authorIdentifier": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorIdentifier", + "value": "audreyId" + }, + "authorIdentifierScheme": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "authorIdentifierScheme", + "value": "DAISY" + }, + "authorName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "authorName", + "value": "Awesome, Audrey" + } + } + ] + }, + { + "multiple": true, + "typeClass": "primitive", + "typeName": "datasetContact", + "value": [ + "pete@malinator.com" + ] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescription", + "value": "Here is my description" + }, + { + "multiple": true, + "typeClass": "controlledVocabulary", + "typeName": "subject", + "value": [ + "Arts and Humanities", + "Astronomy and Astrophysics", + "Business and Management" + ] + }, + { + "multiple": true, + "typeClass": "primitive", + "typeName": "keyword", + "value": [ + "keyword1", + "keyword2" + ] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "depositor", + "value": "Ellen K" + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "dateOfDeposit", + "value": "2014-11-12" + } + ] + }, + "geospatial": { + "displayName": "Geospatial Metadata", + "fields": [ + { + "multiple": true, + "typeClass": "compound", + "typeName": "geographicCoverage", + "value": [ + { + "city": { + "multiple": false, + "typeClass": "primitive", + "typeName": "city", + "value": "Arlington" + }, + "country": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "country", + "value": "United States" + }, + "state": { + "multiple": false, + "typeClass": "primitive", + "typeName": "state", + "value": "MA" + } + }, + { + "city": { + "multiple": false, + "typeClass": "primitive", + "typeName": "city", + "value": "beachcity" + }, + "country": { + "multiple": false, + "typeClass": "controlledVocabulary", + "typeName": "country", + "value": "Aruba" + }, + "state": { + "multiple": false, + "typeClass": "primitive", + "typeName": "state", + "value": "beach" + } + } + ] + }, + { + "multiple": false, + "typeClass": "compound", + "typeName": "geographicBoundingBox", + "value": + { + "eastLongitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "eastLongitude", + "value": "23" + }, + "northLatitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "northLatitude", + "value": "786" + }, + "southLatitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "southLatitude", + "value": "34" + }, + "westLongitude": { + "multiple": false, + "typeClass": "primitive", + "typeName": "westLongitude", + "value": "45" + } + } + + } + ] + }, + "socialscience": { + "displayName": "Social Science and Humanities Metadata", + "fields": [ + { + "multiple": true, + "typeClass": "compound", + "typeName": "software", + "value": [ + { + "softwareName": { + "multiple": false, + "typeClass": "primitive", + "typeName": "softwareName", + "value": "softwareName" + }, + "softwareVersion": { + "multiple": false, + "typeClass": "primitive", + "typeName": "softwareVersion", + "value": "software version" + } + } + ] + }, + { + "multiple": false, + "typeClass": "primitive", + "typeName": "unitOfAnalysis", + "value": "unit of analysis" + } + ] + } + }, + "productionDate": "Production Date", + "versionState": "DRAFT" + } From 86b226008e3ba53b0a0aefb0fd0fe9b15087c3f3 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 24 Nov 2024 21:38:03 -0500 Subject: [PATCH 12/20] tests for selecting persistent ids in the GenericImportService (from PR #11010 by @stevenferey) --- .../api/imports/ImportGenericServiceBean.java | 4 ++ .../imports/ImportGenericServiceBeanTest.java | 53 ++++++++++++++++++- 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java index 7bce0947a0e..aa5b25e3967 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java @@ -367,6 +367,10 @@ private FieldDTO makeDTO(DatasetFieldType dataverseFieldType, FieldDTO value, St return value; } + public String selectIdentifier(DatasetVersionDTO datasetVersionDTO, String suppliedIdentifier) { + return selectIdentifier(datasetVersionDTO, suppliedIdentifier, false); + } + private String selectIdentifier(DatasetVersionDTO datasetVersionDTO, String suppliedIdentifier, boolean preferSuppliedIdentifier) { List otherIds = new ArrayList<>(); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBeanTest.java index 44739f3f62a..acf5d970358 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBeanTest.java @@ -1,6 +1,13 @@ package edu.harvard.iq.dataverse.api.imports; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; +import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO; + +import org.apache.commons.io.FileUtils; +import com.google.gson.Gson; +import java.io.File; +import java.io.IOException; + import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.InjectMocks; @@ -8,6 +15,8 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; +import java.nio.charset.StandardCharsets; + @ExtendWith(MockitoExtension.class) public class ImportGenericServiceBeanTest { @@ -15,7 +24,47 @@ public class ImportGenericServiceBeanTest { private ImportGenericServiceBean importGenericService; @Test - public void testReassignIdentifierAsGlobalId() { + void testIdentifierHarvestableWithOtherID() throws IOException { + // "otherIdValue" containing the value : doi:10.7910/DVN/TJCLKP + File file = new File("src/test/resources/json/importGenericWithOtherId.json"); + String text = FileUtils.readFileToString(file, StandardCharsets.UTF_8); + DatasetVersionDTO dto = new Gson().fromJson(text, DatasetVersionDTO.class); + + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "https://doi.org/10.7910/DVN/TJCLKP")); + // junk or null + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "junk")); + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, null)); + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "http://www.example.com")); + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "https://dataverse.org")); + } + + @Test + void testIdentifierHarvestableWithoutOtherID() throws IOException { + // Does not contain data of type "otherIdValue" + File file = new File("src/test/resources/json/importGenericWithoutOtherId.json"); + String text = FileUtils.readFileToString(file, StandardCharsets.UTF_8); + DatasetVersionDTO dto = new Gson().fromJson(text, DatasetVersionDTO.class); + + // non-URL + assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "doi:10.7910/DVN/TJCLKP")); + assertEquals("hdl:10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "hdl:10.7910/DVN/TJCLKP")); + // HTTPS + assertEquals("https://doi.org/10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "https://doi.org/10.7910/DVN/TJCLKP")); + assertEquals("https://dx.doi.org/10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "https://dx.doi.org/10.7910/DVN/TJCLKP")); + assertEquals("https://hdl.handle.net/10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "https://hdl.handle.net/10.7910/DVN/TJCLKP")); + // HTTP (no S) + assertEquals("http://doi.org/10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "http://doi.org/10.7910/DVN/TJCLKP")); + assertEquals("http://dx.doi.org/10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "http://dx.doi.org/10.7910/DVN/TJCLKP")); + assertEquals("http://hdl.handle.net/10.7910/DVN/TJCLKP", importGenericService.selectIdentifier(dto, "http://hdl.handle.net/10.7910/DVN/TJCLKP")); + // junk or null + assertNull(importGenericService.selectIdentifier(dto, "junk")); + assertNull(importGenericService.selectIdentifier(dto, null)); + assertNull(importGenericService.selectIdentifier(dto, "http://www.example.com")); + assertNull(importGenericService.selectIdentifier(dto, "https://dataverse.org")); + } + + @Test + void testReassignIdentifierAsGlobalId() { // non-URL assertEquals("doi:10.7910/DVN/TJCLKP", importGenericService.reassignIdentifierAsGlobalId("doi:10.7910/DVN/TJCLKP", new DatasetDTO())); assertEquals("hdl:10.7910/DVN/TJCLKP", importGenericService.reassignIdentifierAsGlobalId("hdl:10.7910/DVN/TJCLKP", new DatasetDTO())); @@ -29,6 +78,8 @@ public void testReassignIdentifierAsGlobalId() { assertEquals("hdl:10.7910/DVN/TJCLKP", importGenericService.reassignIdentifierAsGlobalId("http://hdl.handle.net/10.7910/DVN/TJCLKP", new DatasetDTO())); // junk assertNull(importGenericService.reassignIdentifierAsGlobalId("junk", new DatasetDTO())); + assertNull(importGenericService.reassignIdentifierAsGlobalId("http://www.example.com", new DatasetDTO())); + assertNull(importGenericService.reassignIdentifierAsGlobalId("https://dataverse.org", new DatasetDTO())); } } From 115c88eb37d6288d9af1d9a8a2abacd230407700 Mon Sep 17 00:00:00 2001 From: landreev Date: Mon, 25 Nov 2024 11:07:47 -0500 Subject: [PATCH 13/20] Update doc/release-notes/11049-oai-identifiers-as-pids.md Co-authored-by: Philip Durbin --- doc/release-notes/11049-oai-identifiers-as-pids.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/11049-oai-identifiers-as-pids.md b/doc/release-notes/11049-oai-identifiers-as-pids.md index 2f857bf2198..8b53a461a70 100644 --- a/doc/release-notes/11049-oai-identifiers-as-pids.md +++ b/doc/release-notes/11049-oai-identifiers-as-pids.md @@ -2,4 +2,4 @@ This will allow harvesting from sources that do not include a persistent id in their oai_dc metadata records, but use valid dois or handles as the OAI-PMH record header identifiers. -It is also possible to optionally configure a harvesting client to use this OAI-PMH identifier as the **preferred** choice for the persistent id. See the [Harvesting Clients API](https://guides.dataverse.org/en/6.5/api/native-api.html#create-a-harvesting-client) section of the Guides for more information. \ No newline at end of file +It is also possible to optionally configure a harvesting client to use this OAI-PMH identifier as the **preferred** choice for the persistent id. See the [Harvesting Clients API](https://guides.dataverse.org/en/6.5/api/native-api.html#create-a-harvesting-client) section of the Guides, #11049 and #10982 for more information. \ No newline at end of file From a295cc4a34b4836b087fd662e932838ddb278a87 Mon Sep 17 00:00:00 2001 From: landreev Date: Mon, 25 Nov 2024 11:08:07 -0500 Subject: [PATCH 14/20] Update doc/sphinx-guides/source/api/native-api.rst Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/api/native-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 1f36691be0d..641e443e54a 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -5281,7 +5281,7 @@ The following optional fields are supported: - style: Defaults to "default" - a generic OAI archive. (Make sure to use "dataverse" when configuring harvesting from another Dataverse installation). - customHeaders: This can be used to configure this client with a specific HTTP header that will be added to every OAI request. This is to accommodate a use case where the remote server requires this header to supply some form of a token in order to offer some content not available to other clients. See the example below. Multiple headers can be supplied separated by `\\n` - actual "backslash" and "n" characters, not a single "new line" character. - allowHarvestingMissingCVV: Flag to allow datasets to be harvested with Controlled Vocabulary Values that existed in the originating Dataverse Project but are not in the harvesting Dataverse Project. (Default is false). Currently only settable using API. -- useOaiIdentifiersAsPids: Defaults to false; if set to true, Harvester will attempt to use the identifier from the OAI-PMH record header as the **first choice** for the persistent id of the harvested dataset. When set to false, Dataverse will still attempt to use this identifier, but only if none of the `` entries in the OAI_DC record contain a valid persistent id (this is new as of v6.5). +- useOaiIdentifiersAsPids: Defaults to false; if set to true, the harvester will attempt to use the identifier from the OAI-PMH record header as the **first choice** for the persistent id of the harvested dataset. When set to false, Dataverse will still attempt to use this identifier, but only if none of the `` entries in the OAI_DC record contain a valid persistent id (this is new as of v6.5). Generally, the API will accept the output of the GET version of the API for an existing client as valid input, but some fields will be ignored. For example, as of writing this there is no way to configure a harvesting schedule via this API. From 3c4628786f18c235707afc6a282969da11ed4c9d Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 25 Nov 2024 11:22:01 -0500 Subject: [PATCH 15/20] reverted the flyway script back to its original state (a newline was added when resolving a conflict with a cherry-picked commit, which of course changes the checksum) --- src/main/resources/db/migration/V6.4.0.1.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/db/migration/V6.4.0.1.sql b/src/main/resources/db/migration/V6.4.0.1.sql index 438c52a192e..0bcd87dd736 100644 --- a/src/main/resources/db/migration/V6.4.0.1.sql +++ b/src/main/resources/db/migration/V6.4.0.1.sql @@ -1,4 +1,4 @@ -- Adding a case-insensitive index related to #11003 -- -CREATE UNIQUE INDEX IF NOT EXISTS INDEX_DVOBJECT_authority_protocol_upper_identifier ON dvobject (authority, protocol, UPPER(identifier)); +CREATE UNIQUE INDEX IF NOT EXISTS INDEX_DVOBJECT_authority_protocol_upper_identifier ON dvobject (authority, protocol, UPPER(identifier)); \ No newline at end of file From 8a361be347e639aeedc68b6f9bc93c18e2f6eaaf Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 25 Nov 2024 11:26:37 -0500 Subject: [PATCH 16/20] another cherry-picked commit not needed in this branch. --- .../harvard/iq/dataverse/harvest/client/HarvestingClient.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java index de9cc7c0db6..7280b6af129 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java @@ -214,7 +214,6 @@ public void setArchiveDescription(String archiveDescription) { this.archiveDescription = archiveDescription; } - @Column(columnDefinition="TEXT") private String harvestingSet; public String getHarvestingSet() { From 321de7c46b34a53bc9df64b9947fab47f9f126a6 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 26 Nov 2024 10:35:59 -0500 Subject: [PATCH 17/20] there's no need to slap the "incomplete metadata" label on harvested dataset cards #10909 --- src/main/webapp/search-include-fragment.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/search-include-fragment.xhtml b/src/main/webapp/search-include-fragment.xhtml index 505fe681363..fcc5aff6f92 100644 --- a/src/main/webapp/search-include-fragment.xhtml +++ b/src/main/webapp/search-include-fragment.xhtml @@ -582,7 +582,7 @@ - +
From 5e3616e03d382dc20b0722c85a5d51b1afaf9779 Mon Sep 17 00:00:00 2001 From: Julian Gautier Date: Wed, 27 Nov 2024 11:22:33 -0500 Subject: [PATCH 19/20] Update appendix.rst I removed the broken links behind the names of the metadata blocks that are listed in the Supported Metadata section. And I remove "version" from the "(see .tsv version)" text that follows the name of each of the metadata blocks. --- doc/sphinx-guides/source/user/appendix.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/sphinx-guides/source/user/appendix.rst b/doc/sphinx-guides/source/user/appendix.rst index f7843b8bf40..601274a50bd 100755 --- a/doc/sphinx-guides/source/user/appendix.rst +++ b/doc/sphinx-guides/source/user/appendix.rst @@ -22,14 +22,14 @@ Supported Metadata Detailed below are what metadata schemas we support for Citation and Domain Specific Metadata in the Dataverse Project: -- `Citation Metadata `__ (`see .tsv version `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, `DataCite 4.0 `__, and Dublin Core's `DCMI Metadata Terms `__ . Language field uses `ISO 639-1 `__ controlled vocabulary. -- `Geospatial Metadata `__ (`see .tsv version `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, `DataCite 4.0 `__, and Dublin Core. Country / Nation field uses `ISO 3166-1 `_ controlled vocabulary. -- `Social Science & Humanities Metadata `__ (`see .tsv version `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, and Dublin Core. -- `Astronomy and Astrophysics Metadata `__ (`see .tsv version `__): These metadata elements can be mapped/exported to the International Virtual Observatory Alliance’s (IVOA) +- Citation Metadata (`see .tsv `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, `DataCite 4.0 `__, and Dublin Core's `DCMI Metadata Terms `__ . Language field uses `ISO 639-1 `__ controlled vocabulary. +- Geospatial Metadata (`see .tsv `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, `DataCite 4.0 `__, and Dublin Core. Country / Nation field uses `ISO 3166-1 `_ controlled vocabulary. +- Social Science & Humanities Metadata__ (`see .tsv `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, and Dublin Core. +- Astronomy and Astrophysics Metadata (`see .tsv `__): These metadata elements can be mapped/exported to the International Virtual Observatory Alliance’s (IVOA) `VOResource Schema format `__ and is based on `Virtual Observatory (VO) Discovery and Provenance Metadata `__. -- `Life Sciences Metadata `__ (`see .tsv version `__): based on `ISA-Tab Specification `__, along with controlled vocabulary from subsets of the `OBI Ontology `__ and the `NCBI Taxonomy for Organisms `__. -- `Journal Metadata `__ (`see .tsv version `__): based on the `Journal Archiving and Interchange Tag Set, version 1.2 `__. +- Life Sciences Metadata (`see .tsv `__): based on `ISA-Tab Specification `__, along with controlled vocabulary from subsets of the `OBI Ontology `__ and the `NCBI Taxonomy for Organisms `__. +- Journal Metadata (`see .tsv `__): based on the `Journal Archiving and Interchange Tag Set, version 1.2 `__. Experimental Metadata ~~~~~~~~~~~~~~~~~~~~~ @@ -37,7 +37,7 @@ Experimental Metadata Unlike supported metadata, experimental metadata is not enabled by default in a new Dataverse installation. Feedback via any `channel `_ is welcome! - `CodeMeta Software Metadata `__: based on the `CodeMeta Software Metadata Schema, version 2.0 `__ (`see .tsv version `__) -- `Computational Workflow Metadata `__ (`see .tsv version `__): adapted from `Bioschemas Computational Workflow Profile, version 1.0 `__ and `Codemeta `__. +- Computational Workflow Metadata (`see .tsv `__): adapted from `Bioschemas Computational Workflow Profile, version 1.0 `__ and `Codemeta `__. Please note: these custom metadata schemas are not included in the Solr schema for indexing by default, you will need to add them as necessary for your custom metadata blocks. See "Update the Solr Schema" in :doc:`../admin/metadatacustomization`. From 3d50ba86d7db1c46ff1f384960af327e94011e4a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 27 Nov 2024 17:56:02 -0500 Subject: [PATCH 20/20] typo --- doc/sphinx-guides/source/user/appendix.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/user/appendix.rst b/doc/sphinx-guides/source/user/appendix.rst index 601274a50bd..df9b6704209 100755 --- a/doc/sphinx-guides/source/user/appendix.rst +++ b/doc/sphinx-guides/source/user/appendix.rst @@ -24,7 +24,7 @@ Detailed below are what metadata schemas we support for Citation and Domain Spec - Citation Metadata (`see .tsv `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, `DataCite 4.0 `__, and Dublin Core's `DCMI Metadata Terms `__ . Language field uses `ISO 639-1 `__ controlled vocabulary. - Geospatial Metadata (`see .tsv `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, `DataCite 4.0 `__, and Dublin Core. Country / Nation field uses `ISO 3166-1 `_ controlled vocabulary. -- Social Science & Humanities Metadata__ (`see .tsv `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, and Dublin Core. +- Social Science & Humanities Metadata (`see .tsv `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, and Dublin Core. - Astronomy and Astrophysics Metadata (`see .tsv `__): These metadata elements can be mapped/exported to the International Virtual Observatory Alliance’s (IVOA) `VOResource Schema format `__ and is based on `Virtual Observatory (VO) Discovery and Provenance Metadata `__.