From 5fa12a97e16c8946d859d99b2de7842f41ada01c Mon Sep 17 00:00:00 2001 From: Jan van Mansum Date: Tue, 19 Nov 2024 12:40:14 +0100 Subject: [PATCH 1/2] Merged 10890 --- .../10814-Differencing improvement.md | 3 + .../iq/dataverse/DatasetFieldServiceBean.java | 14 +- .../edu/harvard/iq/dataverse/DatasetPage.java | 10 +- .../iq/dataverse/DatasetServiceBean.java | 1 + .../dataverse/DatasetVersionDifference.java | 438 +++++------------- .../command/impl/AbstractDatasetCommand.java | 103 +++- .../impl/UpdateDatasetVersionCommand.java | 203 +++++--- .../iq/dataverse/settings/FeatureFlags.java | 8 + .../impl/CreateDatasetVersionCommandTest.java | 29 +- .../util/cache/CacheFactoryBeanTest.java | 1 + 10 files changed, 401 insertions(+), 409 deletions(-) create mode 100644 doc/release-notes/10814-Differencing improvement.md diff --git a/doc/release-notes/10814-Differencing improvement.md b/doc/release-notes/10814-Differencing improvement.md new file mode 100644 index 00000000000..49bbdae3e1b --- /dev/null +++ b/doc/release-notes/10814-Differencing improvement.md @@ -0,0 +1,3 @@ +### More Scalable Dataset Version Differencing + +Differencing between dataset versions, which is done during dataset edit operations and to populate the dataset page versions table has been made signficantly more scalable. diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java index ff78b0c83ec..8d074ae1257 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java @@ -87,7 +87,7 @@ public class DatasetFieldServiceBean implements java.io.Serializable { //Flat list of cvoc term-uri and managed fields by Id Set cvocFieldSet = null; - + //The hash of the existing CVocConf setting. Used to determine when the setting has changed and it needs to be re-parsed to recreate the cvocMaps String oldHash = null; @@ -276,9 +276,11 @@ public ControlledVocabAlternate save(ControlledVocabAlternate alt) { * @return - a map of JsonObjects containing configuration information keyed by the DatasetFieldType id (Long) */ public Map getCVocConf(boolean byTermUriField){ - + return getCVocConf(byTermUriField, settingsService.getValueForKey(SettingsServiceBean.Key.CVocConf)); + } + + public Map getCVocConf(boolean byTermUriField, String cvocSetting) { //ToDo - change to an API call to be able to provide feedback if the json is invalid? - String cvocSetting = settingsService.getValueForKey(SettingsServiceBean.Key.CVocConf); if (cvocSetting == null || cvocSetting.isEmpty()) { oldHash=null; //Release old maps @@ -295,7 +297,7 @@ public Map getCVocConf(boolean byTermUriField){ cvocMap=new HashMap<>(); cvocMapByTermUri=new HashMap<>(); cvocFieldSet = new HashSet<>(); - + try (JsonReader jsonReader = Json.createReader(new StringReader(settingsService.getValueForKey(SettingsServiceBean.Key.CVocConf)))) { JsonArray cvocConfJsonArray = jsonReader.readArray(); for (JsonObject jo : cvocConfJsonArray.getValuesAs(JsonObject.class)) { @@ -356,11 +358,11 @@ public Set getCvocFieldSet() { /** * Adds information about the external vocabulary term being used in this DatasetField to the ExternalVocabularyValue table if it doesn't already exist. * @param df - the primitive/parent compound field containing a newly saved value + * @param cvocEntry */ - public void registerExternalVocabValues(DatasetField df) { + public void registerExternalVocabValues(DatasetField df, JsonObject cvocEntry) { DatasetFieldType dft = df.getDatasetFieldType(); logger.fine("Registering for field: " + dft.getName()); - JsonObject cvocEntry = getCVocConf(true).get(dft.getId()); if (dft.isPrimitive()) { List siblingsDatasetFields = new ArrayList<>(); if(dft.getParentDatasetFieldType()!=null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index eae4a9f2977..254198b959c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -156,6 +156,7 @@ import edu.harvard.iq.dataverse.search.SearchServiceBean; import edu.harvard.iq.dataverse.search.SearchUtil; import edu.harvard.iq.dataverse.search.SolrClientService; +import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.SignpostingResources; import edu.harvard.iq.dataverse.util.FileMetadataUtil; @@ -1104,7 +1105,7 @@ public Set getFileIdsInVersionFromSolr(Long datasetVersionId, String patte while (iter.hasNext()) { SolrDocument solrDocument = iter.next(); Long entityid = (Long) solrDocument.getFieldValue(SearchFields.ENTITY_ID); - logger.fine("solr result id: "+entityid); + logger.finest("solr result id: "+entityid); resultIds.add(entityid); } @@ -2695,7 +2696,9 @@ public void edit(EditMode editMode) { dataset = datasetService.find(dataset.getId()); } workingVersion = dataset.getOrCreateEditVersion(); - clone = workingVersion.cloneDatasetVersion(); + if(!FeatureFlags.DISABLE_EDIT_DRAFT_LOGGING.enabled()) { + clone = workingVersion.cloneDatasetVersion(); + } if (editMode.equals(EditMode.METADATA)) { datasetVersionUI = datasetVersionUI.initDatasetVersionUI(workingVersion, true); updateDatasetFieldInputLevels(); @@ -3950,7 +3953,10 @@ public String save() { cmd = new UpdateDatasetVersionCommand(dataset, dvRequestService.getDataverseRequest(), filesToBeDeleted, clone ); ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); } + long start = System.currentTimeMillis(); + dataset = commandEngine.submit(cmd); + logger.fine("Save done in: " + (System.currentTimeMillis()-start) + " ms"); if (editMode == EditMode.CREATE) { if (session.getUser() instanceof AuthenticatedUser) { userNotificationService.sendNotification((AuthenticatedUser) session.getUser(), dataset.getCreateDate(), UserNotification.Type.CREATEDS, dataset.getLatestVersion().getId()); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index dab0ff43fcf..d41eba0a979 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -442,6 +442,7 @@ public DatasetLock addDatasetLock(Long datasetId, DatasetLock.Reason reason, Lon // (to prevent multiple, duplicate locks on the dataset!) DatasetLock lock = dataset.getLockFor(reason); if (lock != null) { + logger.warning("There was a lock!"); return lock; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java index eca0c84ae84..d5fd8767f5e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java @@ -8,6 +8,8 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.logging.Logger; @@ -15,6 +17,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import java.util.Arrays; import java.util.Date; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; @@ -40,8 +43,6 @@ public final class DatasetVersionDifference { private List summaryDataForNote = new ArrayList<>(); private List blockDataForNote = new ArrayList<>(); - private VariableMetadataUtil variableMetadataUtil; - private List differenceSummaryGroups = new ArrayList<>(); public List getDifferenceSummaryGroups() { @@ -106,50 +107,67 @@ public DatasetVersionDifference(DatasetVersion newVersion, DatasetVersion origin addToSummary(null, dsfn); } } - - // TODO: ? - // It looks like we are going through the filemetadatas in both versions, - // *sequentially* (i.e. at the cost of O(N*M)), to select the lists of - // changed, deleted and added files between the 2 versions... But why - // are we doing it, if we are doing virtually the same thing inside - // the initDatasetFilesDifferenceList(), below - but in a more efficient - // way (sorting both lists, then goint through them in parallel, at the - // cost of (N+M) max.? - // -- 4.6 Nov. 2016 - + long startTime = System.currentTimeMillis(); + Map originalFileMetadataMap = new HashMap<>(originalVersion.getFileMetadatas().size()); + Map previousIDtoFileMetadataMap = new HashMap<>(); for (FileMetadata fmdo : originalVersion.getFileMetadatas()) { - boolean deleted = true; - for (FileMetadata fmdn : newVersion.getFileMetadatas()) { - if (fmdo.getDataFile().equals(fmdn.getDataFile())) { - deleted = false; - if (!compareFileMetadatas(fmdo, fmdn)) { - changedFileMetadata.add(fmdo); - changedFileMetadata.add(fmdn); - } - if (!variableMetadataUtil.compareVariableMetadata(fmdo,fmdn) || !compareVarGroup(fmdo, fmdn)) { - changedVariableMetadata.add(fmdo); - changedVariableMetadata.add(fmdn); - } - break; - } - } - if (deleted) { - removedFiles.add(fmdo); - } + originalFileMetadataMap.put(fmdo.getDataFile().getId(), fmdo); } + for (FileMetadata fmdn : newVersion.getFileMetadatas()) { - boolean added = true; - for (FileMetadata fmdo : originalVersion.getFileMetadatas()) { - if (fmdo.getDataFile().equals(fmdn.getDataFile())) { - added = false; - break; + DataFile ndf = fmdn.getDataFile(); + Long id = ndf.getId(); + FileMetadata fmdo = originalFileMetadataMap.get(id); + //If this file was in the original version + if(fmdo!= null) { + //Check for differences + if (!compareFileMetadatas(fmdo, fmdn)) { + changedFileMetadata.add(fmdo); + changedFileMetadata.add(fmdn); + } + if (!VariableMetadataUtil.compareVariableMetadata(fmdo,fmdn) || !compareVarGroup(fmdo, fmdn)) { + changedVariableMetadata.add(fmdo); + changedVariableMetadata.add(fmdn); + } + // And drop it from the list since it can't be a deleted file + originalFileMetadataMap.remove(id); + } else { + //It wasn't in the original version + Long prevID = ndf.getPreviousDataFileId(); + //It might be a replacement file or an added file + if(prevID != null) { + //Add it to a map so we can check later to see if it's a replacement + previousIDtoFileMetadataMap.put(prevID, fmdn); + } else { + //Otherwise make it an added file now + addedFiles.add(fmdn); } } - if (added) { - addedFiles.add(fmdn); + } + //Finally check any remaining files from the original version that weren't in the new version' + for (Long removedId : originalFileMetadataMap.keySet()) { + //See if it has been replaced + FileMetadata replacingFmd = previousIDtoFileMetadataMap.get(removedId); + FileMetadata fmdRemoved = originalFileMetadataMap.get(removedId); + if (replacingFmd != null) { + //This is a replacement + replacedFiles.add(new FileMetadata[] { fmdRemoved, replacingFmd }); + //Drop if from the map + previousIDtoFileMetadataMap.remove(removedId); + } else { + //This is a removed file + removedFiles.add(fmdRemoved); } - } - getReplacedFiles(); + } + // Any fms left are not updating existing files and aren't replacing a file, but + // they are claiming a previous file id. That shouldn't be possible, but this will + // make sure they get listed in the difference if they do + for (Entry entry : previousIDtoFileMetadataMap.entrySet()) { + logger.warning("Previous file id claimed for a new file: fmd id: " + entry.getValue() + ", previous file id: " + entry.getKey()); + addedFiles.add(entry.getValue()); + } + + logger.fine("Main difference loop execution time: " + (System.currentTimeMillis() - startTime) + " ms"); initDatasetFilesDifferencesList(); //Sort within blocks by datasetfieldtype dispaly order then.... @@ -173,294 +191,62 @@ public DatasetVersionDifference(DatasetVersion newVersion, DatasetVersion origin getTermsDifferences(); } - private void getReplacedFiles() { - if (addedFiles.isEmpty() || removedFiles.isEmpty()) { - return; - } - List addedToReplaced = new ArrayList<>(); - List removedToReplaced = new ArrayList<>(); - for (FileMetadata added : addedFiles) { - DataFile addedDF = added.getDataFile(); - Long replacedId = addedDF.getPreviousDataFileId(); - if (added.getDataFile().getPreviousDataFileId() != null){ - } - for (FileMetadata removed : removedFiles) { - DataFile test = removed.getDataFile(); - if (test.getId().equals(replacedId)) { - addedToReplaced.add(added); - removedToReplaced.add(removed); - FileMetadata[] replacedArray = new FileMetadata[2]; - replacedArray[0] = removed; - replacedArray[1] = added; - replacedFiles.add(replacedArray); - } - } - } - if(addedToReplaced.isEmpty()){ - } else{ - addedToReplaced.stream().forEach((delete) -> { - addedFiles.remove(delete); - }); - removedToReplaced.stream().forEach((delete) -> { - removedFiles.remove(delete); - }); - } - } + private void getTermsDifferences() { - changedTermsAccess = new ArrayList<>(); - if (newVersion.getTermsOfUseAndAccess() != null && originalVersion.getTermsOfUseAndAccess() != null) { - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getTermsOfUse()).equals(StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getTermsOfUse()))) { - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.header"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getTermsOfUse()), StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getTermsOfUse())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getConfidentialityDeclaration()).equals(StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getConfidentialityDeclaration()))) { - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.declaration"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getConfidentialityDeclaration()), - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getConfidentialityDeclaration())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getSpecialPermissions()).equals(StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getSpecialPermissions()))) { - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.permissions"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getSpecialPermissions()), - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getSpecialPermissions())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getRestrictions()).equals(StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getRestrictions()))) { - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.restrictions"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getRestrictions()), - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getRestrictions())); - - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getCitationRequirements()).equals(StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getCitationRequirements()))) { - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.citationRequirements"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getCitationRequirements()), - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getCitationRequirements())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getDepositorRequirements()).equals(StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getDepositorRequirements()))) { - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.depositorRequirements"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getDepositorRequirements()), - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getDepositorRequirements())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getConditions()).equals(StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getConditions()))) { - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.conditions"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getConditions()), - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getConditions())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getDisclaimer()).equals(StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getDisclaimer()))) { - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.disclaimer"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getDisclaimer()), StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getDisclaimer())); - } - - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getTermsOfAccess()).equals(StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getTermsOfAccess()))) { - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.termsOfsAccess"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getTermsOfAccess()), - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getTermsOfAccess())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getDataAccessPlace()).equals(StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getDataAccessPlace()))) { - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.dataAccessPlace"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getDataAccessPlace()), - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getDataAccessPlace())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getOriginalArchive()).equals(StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getOriginalArchive()))) { - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.originalArchive"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getOriginalArchive()), - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getOriginalArchive())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getAvailabilityStatus()).equals(StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getAvailabilityStatus()))) { - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.availabilityStatus"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getAvailabilityStatus()), - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getAvailabilityStatus())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getContactForAccess()).equals(StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getContactForAccess()))) { - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.contactForAccess"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getContactForAccess()), - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getContactForAccess())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getSizeOfCollection()).equals(StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getSizeOfCollection()))) { - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.sizeOfCollection"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getSizeOfCollection()), - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getSizeOfCollection())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getStudyCompletion()).equals(StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getStudyCompletion()))) { - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.studyCompletion"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getStudyCompletion()), - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getStudyCompletion())); - } + TermsOfUseAndAccess originalTerms = originalVersion.getTermsOfUseAndAccess(); + if(originalTerms == null) { + originalTerms = new TermsOfUseAndAccess(); + } + // newTerms should never be null + TermsOfUseAndAccess newTerms = newVersion.getTermsOfUseAndAccess(); + if(newTerms == null) { + logger.warning("New version does not have TermsOfUseAndAccess"); + newTerms = new TermsOfUseAndAccess(); } - - if (newVersion.getTermsOfUseAndAccess() != null && originalVersion.getTermsOfUseAndAccess() == null) { - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getTermsOfUse()).isEmpty()) { - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.header"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, "", StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getTermsOfUse())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getConfidentialityDeclaration()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.declaration"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, "", - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getConfidentialityDeclaration())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getSpecialPermissions()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.permissions"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, "", - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getSpecialPermissions())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getRestrictions()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.restrictions"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, "", - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getRestrictions())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getCitationRequirements()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.citationRequirements"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, "", - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getCitationRequirements())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getDepositorRequirements()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.depositorRequirements"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, "", - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getDepositorRequirements())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getConditions()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.conditions"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, "", - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getConditions())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getDisclaimer()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.disclaimer"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, "", StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getDisclaimer())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getTermsOfAccess()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.termsOfsAccess"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, "", - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getTermsOfAccess())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getDataAccessPlace()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.dataAccessPlace"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, "", - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getDataAccessPlace())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getOriginalArchive()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.originalArchive"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, "", - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getOriginalArchive())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getAvailabilityStatus()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.availabilityStatus"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, "", - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getAvailabilityStatus())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getContactForAccess()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.contactForAccess"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, "", - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getContactForAccess())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getSizeOfCollection()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.sizeOfCollection"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, "", - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getSizeOfCollection())); - } - if (!StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getStudyCompletion()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.studyCompletion"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, "", - StringUtil.nullToEmpty(newVersion.getTermsOfUseAndAccess().getStudyCompletion())); - } - } - - if (newVersion.getTermsOfUseAndAccess() == null && originalVersion.getTermsOfUseAndAccess() != null) { - if (!StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getTermsOfUse()).isEmpty()) { - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.header"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getTermsOfUse()), ""); - } - if (!StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getConfidentialityDeclaration()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.declaration"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, - StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getConfidentialityDeclaration()), ""); - } - if (!StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getSpecialPermissions()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.permissions"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, - StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getSpecialPermissions()), ""); - } - if (!StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getRestrictions()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.restrictions"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, - StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getRestrictions()), ""); - } - if (!StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getCitationRequirements()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.citationRequirements"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, - StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getCitationRequirements()), ""); - } - if (!StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getDepositorRequirements()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.depositorRequirements"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, - StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getDepositorRequirements()), ""); - } - if (!StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getConditions()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.conditions"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, - StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getConditions()), ""); - } - if (!StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getDisclaimer()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.disclaimer"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getDisclaimer()), ""); - } - if (!StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getTermsOfAccess()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.termsOfsAccess"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, - StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getTermsOfAccess()), ""); - } - if (!StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getDataAccessPlace()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.dataAccessPlace"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, - StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getDataAccessPlace()), ""); - } - if (!StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getOriginalArchive()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.originalArchive"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, - StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getOriginalArchive()), ""); - } - if (!StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getAvailabilityStatus()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.availabilityStatus"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, - StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getAvailabilityStatus()), ""); - } - if (!StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getContactForAccess()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.contactForAccess"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, - StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getContactForAccess()), ""); - } - if (!StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getSizeOfCollection()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.sizeOfCollection"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, - StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getSizeOfCollection()), ""); - } - if (!StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getStudyCompletion()).isEmpty()){ - String diffLabel = BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.studyCompletion"); - changedTermsAccess = addToTermsChangedList(changedTermsAccess, diffLabel, - StringUtil.nullToEmpty(originalVersion.getTermsOfUseAndAccess().getStudyCompletion()), ""); - } - } - } - - private DifferenceSummaryItem createSummaryItem(){ - return null; - } - - private List addToSummaryGroup(String displayName, DifferenceSummaryItem differenceSummaryItem){ - return null; + checkAndAddToChangeList(originalTerms.getTermsOfUse(), newTerms.getTermsOfUse(), + BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.header")); + checkAndAddToChangeList(originalTerms.getConfidentialityDeclaration(), newTerms.getConfidentialityDeclaration(), + BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.declaration")); + checkAndAddToChangeList(originalTerms.getSpecialPermissions(), newTerms.getSpecialPermissions(), + BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.permissions")); + checkAndAddToChangeList(originalTerms.getRestrictions(), newTerms.getRestrictions(), + BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.restrictions")); + checkAndAddToChangeList(originalTerms.getCitationRequirements(), newTerms.getCitationRequirements(), + BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.citationRequirements")); + checkAndAddToChangeList(originalTerms.getDepositorRequirements(), newTerms.getDepositorRequirements(), + BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.depositorRequirements")); + checkAndAddToChangeList(originalTerms.getConditions(), newTerms.getConditions(), + BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.conditions")); + checkAndAddToChangeList(originalTerms.getDisclaimer(), newTerms.getDisclaimer(), + BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfUse.addInfo.disclaimer")); + checkAndAddToChangeList(originalTerms.getTermsOfAccess(), newTerms.getTermsOfAccess(), + BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.termsOfsAccess")); + checkAndAddToChangeList(originalTerms.getDataAccessPlace(), newTerms.getDataAccessPlace(), + BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.dataAccessPlace")); + checkAndAddToChangeList(originalTerms.getOriginalArchive(), newTerms.getOriginalArchive(), + BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.originalArchive")); + checkAndAddToChangeList(originalTerms.getAvailabilityStatus(), newTerms.getAvailabilityStatus(), + BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.availabilityStatus")); + checkAndAddToChangeList(originalTerms.getContactForAccess(), newTerms.getContactForAccess(), + BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.contactForAccess")); + checkAndAddToChangeList(originalTerms.getSizeOfCollection(), newTerms.getSizeOfCollection(), + BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.sizeOfCollection")); + checkAndAddToChangeList(originalTerms.getStudyCompletion(), newTerms.getStudyCompletion(), + BundleUtil.getStringFromBundle("file.dataFilesTab.terms.list.termsOfAccess.addInfo.studyCompletion")); } - - private List addToTermsChangedList(List listIn, String label, String origVal, String newVal) { - String[] diffArray; - diffArray = new String[3]; - diffArray[0] = label; - diffArray[1] = origVal; - diffArray[2] = newVal; - listIn.add(diffArray); - return listIn; + + private void checkAndAddToChangeList(String originalTerm, String newTerm, + String termLabel) { + originalTerm = StringUtil.nullToEmpty(originalTerm); + newTerm = StringUtil.nullToEmpty(newTerm); + if(!originalTerm.equals(newTerm)) { + changedTermsAccess.add(new String[]{termLabel, originalTerm, newTerm}); + } } - private void addToList(List listIn, DatasetField dsfo, DatasetField dsfn) { DatasetField[] dsfArray; dsfArray = new DatasetField[2]; @@ -523,7 +309,7 @@ private void addToNoteSummary(DatasetField dsfo, int added, int deleted, int cha summaryDataForNote.add(noteArray); } - private boolean compareVarGroup(FileMetadata fmdo, FileMetadata fmdn) { + static boolean compareVarGroup(FileMetadata fmdo, FileMetadata fmdn) { List vglo = fmdo.getVarGroups(); List vgln = fmdn.getVarGroups(); @@ -533,7 +319,7 @@ private boolean compareVarGroup(FileMetadata fmdo, FileMetadata fmdn) { int count = 0; for (VarGroup vgo : vglo) { for (VarGroup vgn : vgln) { - if (!variableMetadataUtil.checkDiff(vgo.getLabel(), vgn.getLabel())) { + if (!VariableMetadataUtil.checkDiff(vgo.getLabel(), vgn.getLabel())) { Set dvo = vgo.getVarsInGroup(); Set dvn = vgn.getVarsInGroup(); if (dvo.equals(dvn)) { @@ -1819,4 +1605,12 @@ private static boolean fieldsAreDifferent(DatasetField originalField, DatasetFie } return false; } + + List getgetChangedVariableMetadata() { + return changedVariableMetadata; + } + + List getReplacedFiles() { + return replacedFiles; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java index 1a1f4f9318b..23c734f5cc9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.engine.command.impl; +import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetField; import edu.harvard.iq.dataverse.DatasetFieldServiceBean; @@ -18,18 +19,25 @@ import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.pidproviders.PidProvider; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.doi.fake.FakeDOIProvider; import edu.harvard.iq.dataverse.util.BundleUtil; import java.sql.Timestamp; +import java.util.Arrays; import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Map; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import static java.util.stream.Collectors.joining; import jakarta.ejb.EJB; +import jakarta.json.JsonObject; import jakarta.validation.ConstraintViolation; import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; /** * @@ -217,8 +225,88 @@ protected Timestamp getTimestamp() { return timestamp; } - protected void checkSystemMetadataKeyIfNeeded(DatasetVersion newVersion, DatasetVersion persistedVersion) throws IllegalCommandException { - Set changedMDBs = DatasetVersionDifference.getBlocksWithChanges(newVersion, persistedVersion); + protected void registerFilePidsIfNeeded(Dataset theDataset, CommandContext ctxt, boolean b) throws CommandException { + // Register file PIDs if needed + PidProvider pidGenerator = ctxt.dvObjects().getEffectivePidGenerator(getDataset()); + boolean shouldRegister = !pidGenerator.registerWhenPublished() && + ctxt.systemConfig().isFilePIDsEnabledForCollection(getDataset().getOwner()) && + pidGenerator.canCreatePidsLike(getDataset().getGlobalId()); + if (shouldRegister) { + for (DataFile dataFile : theDataset.getFiles()) { + logger.fine(dataFile.getId() + " is registered?: " + dataFile.isIdentifierRegistered()); + if (!dataFile.isIdentifierRegistered()) { + // pre-register a persistent id + registerFileExternalIdentifier(dataFile, pidGenerator, ctxt, true); + } + } + } + } + + private void registerFileExternalIdentifier(DataFile dataFile, PidProvider pidProvider, CommandContext ctxt, boolean retry) throws CommandException { + + if (!dataFile.isIdentifierRegistered()) { + + if (pidProvider instanceof FakeDOIProvider) { + retry = false; // No reason to allow a retry with the FakeProvider (even if it allows + // pre-registration someday), so set false for efficiency + } + try { + if (pidProvider.alreadyRegistered(dataFile)) { + int attempts = 0; + if (retry) { + do { + pidProvider.generatePid(dataFile); + logger.log(Level.INFO, "Attempting to register external identifier for datafile {0} (trying: {1}).", + new Object[] { dataFile.getId(), dataFile.getIdentifier() }); + attempts++; + } while (pidProvider.alreadyRegistered(dataFile) && attempts <= FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT); + } + if (!retry) { + logger.warning("Reserving File PID for: " + getDataset().getId() + ", fileId: " + dataFile.getId() + ", during publication failed."); + throw new CommandExecutionException(BundleUtil.getStringFromBundle("abstractDatasetCommand.filePidNotReserved", Arrays.asList(getDataset().getIdentifier())), this); + } + if (attempts > FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT) { + // Didn't work - we existed the loop with too many tries + throw new CommandExecutionException("This dataset may not be published because its identifier is already in use by another dataset; " + + "gave up after " + attempts + " attempts. Current (last requested) identifier: " + dataFile.getIdentifier(), this); + } + } + // Invariant: DataFile identifier does not exist in the remote registry + try { + pidProvider.createIdentifier(dataFile); + dataFile.setGlobalIdCreateTime(getTimestamp()); + dataFile.setIdentifierRegistered(true); + } catch (Throwable ex) { + logger.info("Call to globalIdServiceBean.createIdentifier failed: " + ex); + } + + } catch (Throwable e) { + if (e instanceof CommandException) { + throw (CommandException) e; + } + throw new CommandException(BundleUtil.getStringFromBundle("file.register.error", pidProvider.getProviderInformation()), this); + } + } else { + throw new IllegalCommandException("This datafile may not have a PID because its id registry service is not supported.", this); + } + + } + + + void checkSystemMetadataKeyIfNeeded(DatasetVersion newVersion, DatasetVersion persistedVersion) throws IllegalCommandException { + checkSystemMetadataKeyIfNeeded(DatasetVersionDifference.getBlocksWithChanges(newVersion, persistedVersion)); + } + + protected void checkSystemMetadataKeyIfNeeded(DatasetVersionDifference dvDifference) throws IllegalCommandException { + List> changeListsByBlock = dvDifference.getDetailDataByBlock(); + Set changedMDBs = new HashSet<>(); + for (List changeList : changeListsByBlock) { + changedMDBs.add(changeList.get(0)[0].getDatasetFieldType().getMetadataBlock()); + } + checkSystemMetadataKeyIfNeeded(changedMDBs); + } + + private void checkSystemMetadataKeyIfNeeded(Set changedMDBs) throws IllegalCommandException { for (MetadataBlock mdb : changedMDBs) { logger.fine(mdb.getName() + " has been changed"); String smdbString = JvmSettings.MDB_SYSTEM_KEY_FOR.lookupOptional(mdb.getName()) @@ -235,10 +323,15 @@ protected void checkSystemMetadataKeyIfNeeded(DatasetVersion newVersion, Dataset } protected void registerExternalVocabValuesIfAny(CommandContext ctxt, DatasetVersion newVersion) { + registerExternalVocabValuesIfAny(ctxt, newVersion, ctxt.settings().getValueForKey(SettingsServiceBean.Key.CVocConf)); + } + protected void registerExternalVocabValuesIfAny(CommandContext ctxt, DatasetVersion newVersion, String cvocSetting) { + Map cvocConf = ctxt.dsField().getCVocConf(true, cvocSetting); for (DatasetField df : newVersion.getFlatDatasetFields()) { - logger.fine("Found id: " + df.getDatasetFieldType().getId()); - if (ctxt.dsField().getCVocConf(true).containsKey(df.getDatasetFieldType().getId())) { - ctxt.dsField().registerExternalVocabValues(df); + long typeId = df.getDatasetFieldType().getId(); + if (cvocConf.containsKey(typeId)) { + ctxt.dsField().registerExternalVocabValues(df, cvocConf.get(typeId)); + } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index 994f4c7dfb6..878f905e088 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFileCategory; +import edu.harvard.iq.dataverse.DataFileTag; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetLock; import edu.harvard.iq.dataverse.DatasetVersion; @@ -14,16 +15,16 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.DatasetFieldUtil; import edu.harvard.iq.dataverse.util.FileMetadataUtil; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; -import jakarta.validation.ConstraintViolationException; - /** * * @author skraffmiller @@ -37,6 +38,8 @@ public class UpdateDatasetVersionCommand extends AbstractDatasetCommand private final DatasetVersion clone; final FileMetadata fmVarMet; + private String cvocSetting=null; + public UpdateDatasetVersionCommand(Dataset theDataset, DataverseRequest aRequest) { super(aRequest, theDataset); this.filesToDelete = new ArrayList<>(); @@ -100,67 +103,113 @@ public Dataset execute(CommandContext ctxt) throws CommandException { if ( ! (getUser() instanceof AuthenticatedUser) ) { throw new IllegalCommandException("Only authenticated users can update datasets", this); } - - Dataset theDataset = getDataset(); - ctxt.permissions().checkUpdateDatasetVersionLock(theDataset, getRequest(), this); - Dataset savedDataset = null; - - DatasetVersion persistedVersion = clone; - /* - * Unless a pre-change clone has been provided, we need to get it from the db. - * There are two cases: We're updating an existing draft, which has an id, and - * exists in the database We've created a new draft, with null id, and we need - * to get the lastest version in the db - * - */ - if(persistedVersion==null) { - Long id = getDataset().getLatestVersion().getId(); - persistedVersion = ctxt.datasetVersion().find(id!=null ? id: getDataset().getLatestVersionForCopy().getId()); - } - - //Will throw an IllegalCommandException if a system metadatablock is changed and the appropriate key is not supplied. - checkSystemMetadataKeyIfNeeded(getDataset().getOrCreateEditVersion(fmVarMet), persistedVersion); - - getDataset().getOrCreateEditVersion().setLastUpdateTime(getTimestamp()); + long startTime = System.currentTimeMillis(); + logger.fine("Executing UpdateDatasetVersionCommand at: " + startTime); - registerExternalVocabValuesIfAny(ctxt, getDataset().getOrCreateEditVersion(fmVarMet)); + cvocSetting = ctxt.settings().getValueForKey(SettingsServiceBean.Key.CVocConf); + Dataset theDataset = getDataset(); + for(DataFile f:theDataset.getFiles()) { + f.getLatestFileMetadata(); + List dftList = f.getTags(); + if (dftList != null) { + for (DataFileTag dft : f.getTags()) { + logger.info("Found tag: " + dft.getTypeLabel() + " on " + f.getId()); + if(dft.getId()==null) { + ctxt.em().persist(dft); + } + } + } + } + theDataset.getLatestVersion().getFileMetadatas(); + //logger.info("Dataset fmd " + theDataset.getFiles().get(0).getLatestFileMetadata().getId() + " is restricted: " + theDataset.getFiles().get(0).getLatestFileMetadata().isRestricted()); + //logger.info("Dataset latest version fmd " + theDataset.getLatestVersion().getFileMetadatas().get(0).getId() + " is restricted: " + theDataset.getLatestVersion().getFileMetadatas().get(0).isRestricted()); + //Check for an existing lock + ctxt.permissions().checkUpdateDatasetVersionLock(theDataset, getRequest(), this); try { + logger.info("Getting lock"); // Invariant: Dataset has no locks preventing the update String lockInfoMessage = "saving current edits"; DatasetLock lock = ctxt.datasets().addDatasetLock(getDataset().getId(), DatasetLock.Reason.EditInProgress, ((AuthenticatedUser) getUser()).getId(), lockInfoMessage); if (lock != null) { + lock = ctxt.em().merge(lock); theDataset.addLock(lock); } else { logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", getDataset().getId()); } + + DatasetVersion persistedVersion = clone; + /* + * Unless a pre-change clone has been provided, we need to get it from the db. + * There are two cases: We're updating an existing draft, which has an id, and + * exists in the database We've created a new draft, with null id, and we need + * to get the lastest version in the db + * + */ + DatasetVersion latestVersion = theDataset.getLatestVersion(); + logger.info("lates Version num: " + latestVersion.getVersion()); + if (persistedVersion == null) { + Long id = latestVersion.getId(); + persistedVersion = ctxt.datasetVersion() + .find(id != null ? id : getDataset().getLatestVersionForCopy().getId()); + } + // Get or create (currently only when called with fmVarMet != null) a new edit + // version + DatasetVersion editVersion = theDataset.getOrCreateEditVersion(fmVarMet); + //logger.info("Dataset orig edit version fmd " + editVersion.getFileMetadatas().get(0).getId() + " is restricted: " + editVersion.getFileMetadatas().get(0).isRestricted()); + + + // Now merge the dataset + theDataset = ctxt.em().merge(theDataset); + setDataset(theDataset); + logger.info("Dataset merge done at: " + (System.currentTimeMillis() - startTime) + " ms"); + //Lookup of merged version + if (!ctxt.em().contains(editVersion)) { + logger.info("Orig Edit Version not merged"); + } + editVersion = theDataset.getOrCreateEditVersion(fmVarMet); + if (!latestVersion.isWorkingCopy()) { + logger.info("Edit Version had to be created"); + if (!ctxt.em().contains(editVersion)) { + logger.info("Edit Version had to be merged"); + editVersion = ctxt.em().merge(editVersion); + } + } - getDataset().getOrCreateEditVersion(fmVarMet).setDatasetFields(getDataset().getOrCreateEditVersion(fmVarMet).initDatasetFields()); - validateOrDie(getDataset().getOrCreateEditVersion(fmVarMet), isValidateLenient()); + List metadatas = new ArrayList(editVersion.getFileMetadatas()); + boolean changed = false; + for (FileMetadata fmd : editVersion.getFileMetadatas()) { + if (!ctxt.em().contains(fmd)) { + logger.info("FMD " + fmd.getLabel() + " was not merged " + fmd.getId()); + metadatas.remove(fmd); + fmd = ctxt.em().merge(fmd); + metadatas.add(fmd); + changed = true; + } + } + if(changed) { + editVersion.setFileMetadatas(metadatas); + } - final DatasetVersion editVersion = getDataset().getOrCreateEditVersion(fmVarMet); + // Will throw an IllegalCommandException if a system metadatablock is changed + // and the appropriate key is not supplied. + checkSystemMetadataKeyIfNeeded(editVersion, persistedVersion); + + editVersion.setLastUpdateTime(getTimestamp()); + + editVersion.setDatasetFields(editVersion.initDatasetFields()); + validateOrDie(editVersion, isValidateLenient()); DatasetFieldUtil.tidyUpFields(editVersion.getDatasetFields(), true); - // Merge the new version into out JPA context, if needed. - if (editVersion.getId() == null || editVersion.getId() == 0L) { - ctxt.em().persist(editVersion); - } else { - try { - ctxt.em().merge(editVersion); - } catch (ConstraintViolationException e) { - logger.log(Level.SEVERE,"Exception: "); - e.getConstraintViolations().forEach(err->logger.log(Level.SEVERE,err.toString())); - throw e; - } - } + registerExternalVocabValuesIfAny(ctxt, editVersion, cvocSetting); + //Set creator and create date for files if needed for (DataFile dataFile : theDataset.getFiles()) { if (dataFile.getCreateDate() == null) { dataFile.setCreateDate(getTimestamp()); dataFile.setCreator((AuthenticatedUser) getUser()); } - dataFile.setModificationTime(getTimestamp()); } // Remove / delete any files that were removed @@ -187,14 +236,6 @@ public Dataset execute(CommandContext ctxt) throws CommandException { recalculateUNF = true; } } - // we have to merge to update the database but not flush because - // we don't want to create two draft versions! - // Although not completely tested, it looks like this merge handles the - // thumbnail case - if the filemetadata is removed from the context below and - // the dataset still references it, that could cause an issue. Merging here - // avoids any reference from it being the dataset thumbnail - theDataset = ctxt.em().merge(theDataset); - /* * This code has to handle many cases, and anyone making changes should * carefully check tests and basic methods that update the dataset version. The @@ -209,7 +250,6 @@ public Dataset execute(CommandContext ctxt) throws CommandException { * the fmd.getId() is null, which just removes the first element found. */ for (FileMetadata fmd : filesToDelete) { - logger.fine("Deleting fmd: " + fmd.getId() + " for file: " + fmd.getDataFile().getId()); // if file is draft (ie. new to this version), delete it. Otherwise just remove // filemetadata object) // There are a few cases to handle: @@ -225,14 +265,14 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // If the datasetversion doesn't match, we have the fmd from a published version // and we need to remove the one for the newly created draft instead, so we find // it here - logger.fine("Edit ver: " + theDataset.getOrCreateEditVersion().getId()); - logger.fine("fmd ver: " + fmd.getDatasetVersion().getId()); - if (!theDataset.getOrCreateEditVersion().equals(fmd.getDatasetVersion())) { - fmd = FileMetadataUtil.getFmdForFileInEditVersion(fmd, theDataset.getOrCreateEditVersion()); + if (!editVersion.equals(fmd.getDatasetVersion())) { + fmd = FileMetadataUtil.getFmdForFileInEditVersion(fmd, editVersion); } - } - fmd = ctxt.em().merge(fmd); - + } + if(!ctxt.em().contains(fmd)) { + logger.info("FMD wasn't merged"); + fmd = ctxt.em().merge(fmd); + } // There are two datafile cases as well - the file has been released, so we're // just removing it from the current draft version or it is only in the draft // version and we completely remove the file. @@ -250,42 +290,63 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // In either case, to fully remove the fmd, we have to remove any other possible // references // From the datasetversion - FileMetadataUtil.removeFileMetadataFromList(theDataset.getOrCreateEditVersion().getFileMetadatas(), fmd); + FileMetadataUtil.removeFileMetadataFromList(editVersion.getFileMetadatas(), fmd); // and from the list associated with each category for (DataFileCategory cat : theDataset.getCategories()) { FileMetadataUtil.removeFileMetadataFromList(cat.getFileMetadatas(), fmd); } } - for(FileMetadata fmd: theDataset.getOrCreateEditVersion().getFileMetadatas()) { - logger.fine("FMD: " + fmd.getId() + " for file: " + fmd.getDataFile().getId() + "is in final draft version"); + + if (logger.isLoggable(Level.FINE)) { + for (FileMetadata fmd : editVersion.getFileMetadatas()) { + logger.fine("FMD: " + fmd.getId() + " for file: " + fmd.getDataFile().getId() + + "is in final draft version"); + } } + + registerFilePidsIfNeeded(theDataset, ctxt, true); if (recalculateUNF) { - ctxt.ingest().recalculateDatasetVersionUNF(theDataset.getOrCreateEditVersion()); + ctxt.ingest().recalculateDatasetVersionUNF(editVersion); } theDataset.setModificationTime(getTimestamp()); - savedDataset = ctxt.em().merge(theDataset); - ctxt.em().flush(); + //Update the DatasetUser (which merges it into the context) updateDatasetUser(ctxt); if (clone != null) { - DatasetVersionDifference dvd = new DatasetVersionDifference(editVersion, clone); AuthenticatedUser au = (AuthenticatedUser) getUser(); - ctxt.datasetVersion().writeEditVersionLog(dvd, au); + DatasetVersionDifference dvDifference = new DatasetVersionDifference(editVersion, clone); + ctxt.datasetVersion().writeEditVersionLog(dvDifference, au); + logger.fine("log done at: " + (System.currentTimeMillis()-startTime)); + } + if ( theDataset != null ) { + final Dataset savedDataset=theDataset; + logger.info("Locks found: " + theDataset.getLocks().size()); + new HashSet<>(savedDataset.getLocks()).stream() + .filter( l -> l.getReason() == DatasetLock.Reason.EditInProgress ) + .forEach( existingLock -> { + existingLock = ctxt.em().merge(existingLock); + savedDataset.removeLock(existingLock); + + AuthenticatedUser user = existingLock.getUser(); + user.getDatasetLocks().remove(existingLock); + + ctxt.em().remove(existingLock); + }); } + logger.info("Done with changes at " + (System.currentTimeMillis()-startTime)); } finally { // We're done making changes - remove the lock... - //Failures above may occur before savedDataset is set, in which case we need to remove the lock on theDataset instead - if(savedDataset!=null) { - ctxt.datasets().removeDatasetLocks(savedDataset, DatasetLock.Reason.EditInProgress); - } else { + //Only happens if an exception has caused us to miss the lock removal in this transaction + if(!theDataset.getLocks().isEmpty()) { ctxt.datasets().removeDatasetLocks(theDataset, DatasetLock.Reason.EditInProgress); + } else { + logger.fine("No locks to remove"); } } - - return savedDataset; + return theDataset; } @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java index 2bfda69247a..93b252e4304 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java @@ -101,6 +101,14 @@ public enum FeatureFlags { * @since Dataverse 6.4 */ DISABLE_DATASET_THUMBNAIL_AUTOSELECT("disable-dataset-thumbnail-autoselect"), + /** + * Feature flag for the new Globus upload framework. + */ + GLOBUS_USE_EXPERIMENTAL_ASYNC_FRAMEWORK("globus-use-experimental-async-framework"), + /** + * Disable the edit-draft logging of all changes made to a draft dataset + */ + DISABLE_EDIT_DRAFT_LOGGING("disable-edit-draft-logging"), ; final String flag; diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommandTest.java index a2d9cdfb917..9df6f442622 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommandTest.java @@ -1,11 +1,14 @@ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DatasetServiceBean; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.mocks.MocksFactory; +import jakarta.json.JsonObject; + import static edu.harvard.iq.dataverse.mocks.MocksFactory.*; import edu.harvard.iq.dataverse.engine.TestCommandContext; import edu.harvard.iq.dataverse.engine.TestDataverseEngine; @@ -52,9 +55,18 @@ public void testSimpleVersionAddition() throws Exception { CreateDatasetVersionCommand sut = new CreateDatasetVersionCommand( makeRequest(), ds, dsvNew ); final MockDatasetServiceBean serviceBean = new MockDatasetServiceBean(); - TestDataverseEngine testEngine = new TestDataverseEngine( new TestCommandContext(){ - @Override public DatasetServiceBean datasets() { return serviceBean; } - } ); + final MockDatasetFieldServiceBean dsfServiceBean = new MockDatasetFieldServiceBean(); + TestDataverseEngine testEngine = new TestDataverseEngine(new TestCommandContext() { + @Override + public DatasetServiceBean datasets() { + return serviceBean; + } + + @Override + public DatasetFieldServiceBean dsField() { + return dsfServiceBean; + } + }); testEngine.submit(sut); @@ -106,4 +118,15 @@ public DatasetVersion storeVersion(DatasetVersion dsv) { } + static class MockDatasetFieldServiceBean extends DatasetFieldServiceBean { + + boolean storeVersionCalled = false; + + @Override + public Map getCVocConf(boolean b, String s) { + return new HashMap<>(); + } + + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/cache/CacheFactoryBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/util/cache/CacheFactoryBeanTest.java index 89f04e0cd5a..461094b3657 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/cache/CacheFactoryBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/cache/CacheFactoryBeanTest.java @@ -137,6 +137,7 @@ public void testGuestUserGettingRateLimited() { } @Test + @ResourceLock(value = "cache") public void testAdminUserExemptFromGettingRateLimited() { Command action = new ListExplicitGroupsCommand(null,null); authUser.setSuperuser(true); From eab4122f245b7d91d331385d673c24873bb669e7 Mon Sep 17 00:00:00 2001 From: Jan van Mansum Date: Tue, 19 Nov 2024 16:21:29 +0100 Subject: [PATCH 2/2] Fix db constraint violation --- .../engine/command/impl/UpdateDatasetVersionCommand.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index 878f905e088..0826cd7c4c2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -202,16 +202,17 @@ public Dataset execute(CommandContext ctxt) throws CommandException { DatasetFieldUtil.tidyUpFields(editVersion.getDatasetFields(), true); - registerExternalVocabValuesIfAny(ctxt, editVersion, cvocSetting); - //Set creator and create date for files if needed for (DataFile dataFile : theDataset.getFiles()) { if (dataFile.getCreateDate() == null) { dataFile.setCreateDate(getTimestamp()); dataFile.setCreator((AuthenticatedUser) getUser()); + ctxt.em().merge(dataFile); } } + registerExternalVocabValuesIfAny(ctxt, editVersion, cvocSetting); + // Remove / delete any files that were removed // If any of the files that we are deleting has a UNF, we will need to