From d338caec0310d1466fc3dbcf83d672af8b9c6e6c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 9 May 2024 15:57:11 -0400 Subject: [PATCH 01/20] only delete possible orphans --- .../iq/dataverse/search/IndexServiceBean.java | 61 +++++++++++++------ 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index e61b93a741f..6b7a74cdeef 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -486,9 +486,40 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr String solrIdDeaccessioned = determineDeaccessionedDatasetId(dataset); StringBuilder debug = new StringBuilder(); debug.append("\ndebug:\n"); + DatasetVersion latestVersion = dataset.getLatestVersion(); + String latestVersionStateString = latestVersion.getVersionState().name(); + DatasetVersion.VersionState latestVersionState = latestVersion.getVersionState(); + DatasetVersion releasedVersion = dataset.getReleasedVersion(); + boolean atLeastOnePublishedVersion = false; + if (releasedVersion != null) { + atLeastOnePublishedVersion = true; + } else { + atLeastOnePublishedVersion = false; + } + List solrIdsOfFilesToDelete = null; + + try { + solrIdsOfFilesToDelete = findFilesOfParentDataset(dataset.getId()); + List fileMetadatas = latestVersion.getFileMetadatas(); + + for (FileMetadata fileMetadata : fileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); + solrIdsOfFilesToDelete.remove(solrIdOfPublishedFile); + } + if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { + fileMetadatas = releasedVersion.getFileMetadatas(); + for (FileMetadata fileMetadata : fileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); + solrIdsOfFilesToDelete.remove(solrIdOfPublishedFile); + } + } + } catch (SearchException | NullPointerException ex) { + logger.fine("could not run search of files to delete: " + ex); + } int numPublishedVersions = 0; List versions = dataset.getVersions(); - List solrIdsOfFilesToDelete = new ArrayList<>(); + //List solrIdsOfFilesToDelete = new ArrayList<>(); + //Debugging loop for (DatasetVersion datasetVersion : versions) { Long versionDatabaseId = datasetVersion.getId(); String versionTitle = datasetVersion.getTitle(); @@ -500,10 +531,10 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr debug.append("version found with database id " + versionDatabaseId + "\n"); debug.append("- title: " + versionTitle + "\n"); debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); - List fileMetadatas = datasetVersion.getFileMetadatas(); List fileInfo = new ArrayList<>(); + List fileMetadatas = datasetVersion.getFileMetadatas(); + for (FileMetadata fileMetadata : fileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); /** * It sounds weird but the first thing we'll do is preemptively * delete the Solr documents of all published files. Don't @@ -515,10 +546,9 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr * searchable. See also * https://github.com/IQSS/dataverse/issues/762 */ - solrIdsOfFilesToDelete.add(solrIdOfPublishedFile); fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); } - try { +// try { /** * Preemptively delete *all* Solr documents for files associated * with the dataset based on a Solr query. @@ -539,11 +569,11 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr * @todo We should also delete the corresponding Solr * "permission" documents for the files. */ - List allFilesForDataset = findFilesOfParentDataset(dataset.getId()); - solrIdsOfFilesToDelete.addAll(allFilesForDataset); - } catch (SearchException | NullPointerException ex) { - logger.fine("could not run search of files to delete: " + ex); - } + //List allFilesForDataset = findFilesOfParentDataset(dataset.getId()); + //solrIdsOfFilesToDelete.addAll(allFilesForDataset); +// } catch (SearchException | NullPointerException ex) { +// logger.fine("could not run search of files to delete: " + ex); +// } int numFiles = 0; if (fileMetadatas != null) { numFiles = fileMetadatas.size(); @@ -555,16 +585,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfFilesToDelete); debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); } - DatasetVersion latestVersion = dataset.getLatestVersion(); - String latestVersionStateString = latestVersion.getVersionState().name(); - DatasetVersion.VersionState latestVersionState = latestVersion.getVersionState(); - DatasetVersion releasedVersion = dataset.getReleasedVersion(); - boolean atLeastOnePublishedVersion = false; - if (releasedVersion != null) { - atLeastOnePublishedVersion = true; - } else { - atLeastOnePublishedVersion = false; - } + Map desiredCards = new LinkedHashMap<>(); /** * @todo refactor all of this below and have a single method that takes From 5684020068e15904365b20030d09080ace72dff2 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 9 May 2024 16:28:40 -0400 Subject: [PATCH 02/20] more redundant deletes --- .../iq/dataverse/search/IndexServiceBean.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 6b7a74cdeef..71bd7fd9bb1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1935,9 +1935,9 @@ private String removeDeaccessioned(Dataset dataset) { StringBuilder result = new StringBuilder(); String deleteDeaccessionedResult = removeSolrDocFromIndex(determineDeaccessionedDatasetId(dataset)); result.append(deleteDeaccessionedResult); - List docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.DEACCESSIONED); - String deleteFilesResult = removeMultipleSolrDocs(docIds); - result.append(deleteFilesResult); +// List docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.DEACCESSIONED); +// String deleteFilesResult = removeMultipleSolrDocs(docIds); +// result.append(deleteFilesResult); return result.toString(); } @@ -1945,9 +1945,9 @@ private String removePublished(Dataset dataset) { StringBuilder result = new StringBuilder(); String deletePublishedResult = removeSolrDocFromIndex(determinePublishedDatasetSolrDocId(dataset)); result.append(deletePublishedResult); - List docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.PUBLISHED); - String deleteFilesResult = removeMultipleSolrDocs(docIds); - result.append(deleteFilesResult); +// List docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.PUBLISHED); +// String deleteFilesResult = removeMultipleSolrDocs(docIds); +// result.append(deleteFilesResult); return result.toString(); } From 67dc2b06e4274abb7a0c491e4fc59049d450e45b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 22 May 2024 15:05:20 -0400 Subject: [PATCH 03/20] add suffix to checks, shuffle logging --- .../iq/dataverse/search/IndexServiceBean.java | 119 +++++++++--------- 1 file changed, 62 insertions(+), 57 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 71bd7fd9bb1..18d6d77a686 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -37,6 +37,7 @@ import java.util.concurrent.Future; import java.util.concurrent.Semaphore; import java.util.function.Function; +import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; import jakarta.annotation.PostConstruct; @@ -500,15 +501,18 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr try { solrIdsOfFilesToDelete = findFilesOfParentDataset(dataset.getId()); - List fileMetadatas = latestVersion.getFileMetadatas(); - - for (FileMetadata fileMetadata : fileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); + logger.fine("Existing file docs: " + String.join(", ", solrIdsOfFilesToDelete)); + if(latestVersion.isDeaccessioned() && !atLeastOnePublishedVersion) { + List latestFileMetadatas = latestVersion.getFileMetadatas(); + String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); + for (FileMetadata fileMetadata : latestFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() + suffix; solrIdsOfFilesToDelete.remove(solrIdOfPublishedFile); } + } if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { - fileMetadatas = releasedVersion.getFileMetadatas(); - for (FileMetadata fileMetadata : fileMetadatas) { + List releasedFileMetadatas = releasedVersion.getFileMetadatas(); + for (FileMetadata fileMetadata : releasedFileMetadatas) { String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); solrIdsOfFilesToDelete.remove(solrIdOfPublishedFile); } @@ -516,74 +520,75 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr } catch (SearchException | NullPointerException ex) { logger.fine("could not run search of files to delete: " + ex); } + logger.fine("File docs to delete: " + String.join(", ", solrIdsOfFilesToDelete)); int numPublishedVersions = 0; List versions = dataset.getVersions(); //List solrIdsOfFilesToDelete = new ArrayList<>(); - //Debugging loop - for (DatasetVersion datasetVersion : versions) { - Long versionDatabaseId = datasetVersion.getId(); - String versionTitle = datasetVersion.getTitle(); - String semanticVersion = datasetVersion.getSemanticVersion(); - DatasetVersion.VersionState versionState = datasetVersion.getVersionState(); - if (versionState.equals(DatasetVersion.VersionState.RELEASED)) { - numPublishedVersions += 1; - } - debug.append("version found with database id " + versionDatabaseId + "\n"); - debug.append("- title: " + versionTitle + "\n"); - debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); - List fileInfo = new ArrayList<>(); - List fileMetadatas = datasetVersion.getFileMetadatas(); + if (logger.isLoggable(Level.FINE)) { + for (DatasetVersion datasetVersion : versions) { + Long versionDatabaseId = datasetVersion.getId(); + String versionTitle = datasetVersion.getTitle(); + String semanticVersion = datasetVersion.getSemanticVersion(); + DatasetVersion.VersionState versionState = datasetVersion.getVersionState(); + if (versionState.equals(DatasetVersion.VersionState.RELEASED)) { + numPublishedVersions += 1; + } + debug.append("version found with database id " + versionDatabaseId + "\n"); + debug.append("- title: " + versionTitle + "\n"); + debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); + List fileInfo = new ArrayList<>(); + List fileMetadatas = datasetVersion.getFileMetadatas(); - for (FileMetadata fileMetadata : fileMetadatas) { - /** - * It sounds weird but the first thing we'll do is preemptively - * delete the Solr documents of all published files. Don't - * worry, published files will be re-indexed later along with - * the dataset. We do this so users can delete files from - * published versions of datasets and then re-publish a new - * version without fear that their old published files (now - * deleted from the latest published version) will be - * searchable. See also - * https://github.com/IQSS/dataverse/issues/762 - */ - fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); - } + for (FileMetadata fileMetadata : fileMetadatas) { + /** + * It sounds weird but the first thing we'll do is preemptively delete the Solr + * documents of all published files. Don't worry, published files will be + * re-indexed later along with the dataset. We do this so users can delete files + * from published versions of datasets and then re-publish a new version without + * fear that their old published files (now deleted from the latest published + * version) will be searchable. See also + * https://github.com/IQSS/dataverse/issues/762 + */ + fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); + } // try { /** - * Preemptively delete *all* Solr documents for files associated - * with the dataset based on a Solr query. + * Preemptively delete *all* Solr documents for files associated with the + * dataset based on a Solr query. * - * We must query Solr for this information because the file has - * been deleted from the database ( perhaps when Solr was down, - * as reported in https://github.com/IQSS/dataverse/issues/2086 - * ) so the database doesn't even know about the file. It's an - * orphan. + * We must query Solr for this information because the file has been deleted + * from the database ( perhaps when Solr was down, as reported in + * https://github.com/IQSS/dataverse/issues/2086 ) so the database doesn't even + * know about the file. It's an orphan. * - * @todo This Solr query should make the iteration above based - * on the database unnecessary because it the Solr query should - * find all files for the dataset. We can probably remove the - * iteration above after an "index all" has been performed. - * Without an "index all" we won't be able to find files based - * on parentId because that field wasn't searchable in 4.0. + * @todo This Solr query should make the iteration above based on the database + * unnecessary because it the Solr query should find all files for the + * dataset. We can probably remove the iteration above after an "index + * all" has been performed. Without an "index all" we won't be able to + * find files based on parentId because that field wasn't searchable in + * 4.0. * - * @todo We should also delete the corresponding Solr - * "permission" documents for the files. + * @todo We should also delete the corresponding Solr "permission" documents for + * the files. */ - //List allFilesForDataset = findFilesOfParentDataset(dataset.getId()); - //solrIdsOfFilesToDelete.addAll(allFilesForDataset); + // List allFilesForDataset = findFilesOfParentDataset(dataset.getId()); + // solrIdsOfFilesToDelete.addAll(allFilesForDataset); // } catch (SearchException | NullPointerException ex) { // logger.fine("could not run search of files to delete: " + ex); // } - int numFiles = 0; - if (fileMetadatas != null) { - numFiles = fileMetadatas.size(); + int numFiles = 0; + if (fileMetadatas != null) { + numFiles = fileMetadatas.size(); + } + debug.append("- files: " + numFiles + " " + fileInfo.toString() + "\n"); } - debug.append("- files: " + numFiles + " " + fileInfo.toString() + "\n"); } debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); if (doNormalSolrDocCleanUp) { - IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfFilesToDelete); - debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); + if(!solrIdsOfFilesToDelete.isEmpty()) { + IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfFilesToDelete); + debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); + } } Map desiredCards = new LinkedHashMap<>(); From 2ad902c65eccfa8d955996965a135b14cef4cc87 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 22 May 2024 16:26:43 -0400 Subject: [PATCH 04/20] fix delete logic --- .../iq/dataverse/search/IndexServiceBean.java | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 18d6d77a686..437287aa755 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -502,13 +502,15 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr try { solrIdsOfFilesToDelete = findFilesOfParentDataset(dataset.getId()); logger.fine("Existing file docs: " + String.join(", ", solrIdsOfFilesToDelete)); - if(latestVersion.isDeaccessioned() && !atLeastOnePublishedVersion) { - List latestFileMetadatas = latestVersion.getFileMetadatas(); - String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); - for (FileMetadata fileMetadata : latestFileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() + suffix; - solrIdsOfFilesToDelete.remove(solrIdOfPublishedFile); - } + //We keep the latest version's docs unless it is deaccessioned and there is no published/released version + //So skip the loop removing those docs from the delete list in that case + if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { + List latestFileMetadatas = latestVersion.getFileMetadatas(); + String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); + for (FileMetadata fileMetadata : latestFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() + suffix; + solrIdsOfFilesToDelete.remove(solrIdOfPublishedFile); + } } if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { List releasedFileMetadatas = releasedVersion.getFileMetadatas(); From 34b03fed73933c6687069f680ad2eac23c36164b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 22 May 2024 16:27:47 -0400 Subject: [PATCH 05/20] drafts already deleted --- .../iq/dataverse/search/IndexServiceBean.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 437287aa755..10c9b9bbe07 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -673,11 +673,11 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr desiredCards.put(DatasetVersion.VersionState.DRAFT, false); if (doNormalSolrDocCleanUp) { - List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); + //List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); String deleteDraftDatasetVersionResult = removeSolrDocFromIndex(solrIdDraftDataset); - String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); - results.append("Attempting to delete traces of drafts. Result: ") - .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); + //String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); + //results.append("Attempting to delete traces of drafts. Result: ") + // .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); } /** @@ -721,11 +721,11 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr desiredCards.put(DatasetVersion.VersionState.DRAFT, false); if (doNormalSolrDocCleanUp) { - List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); + //List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); String deleteDraftDatasetVersionResult = removeSolrDocFromIndex(solrIdDraftDataset); - String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); - results.append("The latest version is published. Attempting to delete drafts. Result: ") - .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); + //String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); + //results.append("The latest version is published. Attempting to delete drafts. Result: ") + // .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); } desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); @@ -822,13 +822,13 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr } } - private String deleteDraftFiles(List solrDocIdsForDraftFilesToDelete) { +/* private String deleteDraftFiles(List solrDocIdsForDraftFilesToDelete) { String deleteDraftFilesResults = ""; IndexResponse indexResponse = solrIndexService.deleteMultipleSolrIds(solrDocIdsForDraftFilesToDelete); deleteDraftFilesResults = indexResponse.toString(); return deleteDraftFilesResults; } - +*/ private IndexResponse indexDatasetPermissions(Dataset dataset) { boolean disabledForDebugging = false; if (disabledForDebugging) { From 90bfcf4234b56d8aef5f18c7a19df86483c2cb8b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 22 May 2024 16:28:49 -0400 Subject: [PATCH 06/20] don't run though file doc creation if not using it --- .../harvard/iq/dataverse/search/IndexServiceBean.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 10c9b9bbe07..985712da158 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1224,8 +1224,8 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set Date: Mon, 3 Jun 2024 10:48:10 -0400 Subject: [PATCH 07/20] add permission doc deletes, check/delete per-version perm docs via api --- .../iq/dataverse/DataFileServiceBean.java | 8 +++ .../iq/dataverse/search/IndexServiceBean.java | 49 ++++++++++++++++++- 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 41ea6ae39f0..7f38107af6b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse; +import edu.harvard.iq.dataverse.DatasetVersion.VersionState; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; @@ -759,6 +760,13 @@ public List findAll() { return em.createQuery("select object(o) from DataFile as o order by o.id", DataFile.class).getResultList(); } + public List findVersionStates(Long fileId) { + Query query = em.createQuery( + "select distinct dv.versionState from DatasetVersion dv where dv.id in (select fm.datasetVersion.id from FileMetadata fm where fm.dataFile.id=:fileId)"); + query.setParameter("fileId", fileId); + return query.getResultList(); + } + public DataFile save(DataFile dataFile) { if (dataFile.isMergeable()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 985712da158..bab7b196f2a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.search; import edu.harvard.iq.dataverse.*; +import edu.harvard.iq.dataverse.DatasetVersion.VersionState; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; @@ -503,7 +504,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr solrIdsOfFilesToDelete = findFilesOfParentDataset(dataset.getId()); logger.fine("Existing file docs: " + String.join(", ", solrIdsOfFilesToDelete)); //We keep the latest version's docs unless it is deaccessioned and there is no published/released version - //So skip the loop removing those docs from the delete list in that case + //So skip the loop removing those docs from the delete list except in that case if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { List latestFileMetadatas = latestVersion.getFileMetadatas(); String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); @@ -588,6 +589,10 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); if (doNormalSolrDocCleanUp) { if(!solrIdsOfFilesToDelete.isEmpty()) { + for(String file: solrIdsOfFilesToDelete) { + //Also remove associated permission docs + solrIdsOfFilesToDelete.add(file+"_permission"); + } IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfFilesToDelete); debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); } @@ -2088,8 +2093,48 @@ public List findPermissionsInSolrOnly() throws SearchException { SolrDocumentList list = rsp.getResults(); for (SolrDocument doc: list) { long id = Long.parseLong((String) doc.getFieldValue(SearchFields.DEFINITION_POINT_DVOBJECT_ID)); + String docId = (String)doc.getFieldValue(SearchFields.ID); if(!dvObjectService.checkExists(id)) { - permissionInSolrOnly.add((String)doc.getFieldValue(SearchFields.ID)); + permissionInSolrOnly.add(docId); + } else { + DvObject obj = dvObjectService.findDvObject(id); + if (obj instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersion(); + if (docId.endsWith("draft_permission")) { + if (!dv.isDraft()) { + permissionInSolrOnly.add(docId); + } + } else if (docId.endsWith("deaccessioned_permission")) { + if (!dv.isDeaccessioned()) { + permissionInSolrOnly.add(docId); + } + } else { + if (d.getReleasedVersion() != null) { + permissionInSolrOnly.add(docId); + } + } + } else if (obj instanceof DataFile f) { + List states = dataFileService.findVersionStates(f.getId()); + Set strings = states.stream().map(VersionState::toString).collect(Collectors.toSet()); + logger.info("for " + docId + " states: " + String.join(", ", strings)); + if (docId.endsWith("draft_permission")) { + if (!states.contains(VersionState.DRAFT)) { + permissionInSolrOnly.add(docId); + } + } else if (docId.endsWith("deaccessioned_permission")) { + if (!states.contains(VersionState.DEACCESSIONED) && states.size() == 1) { + permissionInSolrOnly.add(docId); + } + } else { + if (!states.contains(VersionState.RELEASED)) { + permissionInSolrOnly.add(docId); + } else if (!dataFileService.findMostRecentVersionFileIsIn(f).getDatasetVersion() + .equals(f.getOwner().getReleasedVersion())) { + permissionInSolrOnly.add(docId); + } + + } + } } } if (cursorMark.equals(nextCursorMark)) { From da5b10b40ab3b364218c914ca9586eae888ce809 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 12 Jun 2024 17:29:44 -0400 Subject: [PATCH 08/20] typo - released logic backwards --- .../java/edu/harvard/iq/dataverse/search/IndexServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index d2aa52ce395..98bcdf385fc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -2099,7 +2099,7 @@ public List findPermissionsInSolrOnly() throws SearchException { permissionInSolrOnly.add(docId); } } else { - if (d.getReleasedVersion() != null) { + if (d.getReleasedVersion() == null) { permissionInSolrOnly.add(docId); } } From 04580a3f4403d1a7dbf98e54cae411ff9ff5f9eb Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 12 Jun 2024 17:30:10 -0400 Subject: [PATCH 09/20] add dataset doc cleanup, fix looping error --- .../iq/dataverse/search/IndexServiceBean.java | 51 +++++++++++++------ 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 98bcdf385fc..01a6039b91b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -492,14 +492,12 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr boolean atLeastOnePublishedVersion = false; if (releasedVersion != null) { atLeastOnePublishedVersion = true; - } else { - atLeastOnePublishedVersion = false; } - List solrIdsOfFilesToDelete = null; + List solrIdsOfDocsToDelete = null; try { - solrIdsOfFilesToDelete = findFilesOfParentDataset(dataset.getId()); - logger.fine("Existing file docs: " + String.join(", ", solrIdsOfFilesToDelete)); + solrIdsOfDocsToDelete = findFilesOfParentDataset(dataset.getId()); + logger.fine("Existing file docs: " + String.join(", ", solrIdsOfDocsToDelete)); //We keep the latest version's docs unless it is deaccessioned and there is no published/released version //So skip the loop removing those docs from the delete list except in that case if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { @@ -507,20 +505,36 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); for (FileMetadata fileMetadata : latestFileMetadatas) { String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() + suffix; - solrIdsOfFilesToDelete.remove(solrIdOfPublishedFile); + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); } } if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { List releasedFileMetadatas = releasedVersion.getFileMetadatas(); for (FileMetadata fileMetadata : releasedFileMetadatas) { String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); - solrIdsOfFilesToDelete.remove(solrIdOfPublishedFile); + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); } } + //Clear any unused dataset docs + if (!latestVersion.isDraft()) { + // The latest version is released, so should delete any draft docs for the + // dataset + solrIdsOfDocsToDelete.add(solrDocIdentifierDataset + dataset.getId() + draftSuffix); + } + if (!atLeastOnePublishedVersion) { + // There's no released version, so should delete any normal state docs for the + // dataset + solrIdsOfDocsToDelete.add(solrDocIdentifierDataset + dataset.getId()); + } + if (atLeastOnePublishedVersion || !latestVersion.isDeaccessioned()) { + // There's a released version or a draft, so should delete any deaccessioned + // state docs for the dataset + solrIdsOfDocsToDelete.add(solrDocIdentifierDataset + dataset.getId() + deaccessionedSuffix); + } } catch (SearchException | NullPointerException ex) { logger.fine("could not run search of files to delete: " + ex); } - logger.fine("File docs to delete: " + String.join(", ", solrIdsOfFilesToDelete)); + logger.fine("Solr docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); int numPublishedVersions = 0; List versions = dataset.getVersions(); //List solrIdsOfFilesToDelete = new ArrayList<>(); @@ -585,12 +599,17 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr } debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); if (doNormalSolrDocCleanUp) { - if(!solrIdsOfFilesToDelete.isEmpty()) { - for(String file: solrIdsOfFilesToDelete) { + + if(!solrIdsOfDocsToDelete.isEmpty()) { + List solrIdsOfPermissionDocsToDelete = new ArrayList<>(); + for(String file: solrIdsOfDocsToDelete) { //Also remove associated permission docs - solrIdsOfFilesToDelete.add(file+"_permission"); + solrIdsOfPermissionDocsToDelete.add(file + discoverabilityPermissionSuffix); } - IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfFilesToDelete); + solrIdsOfDocsToDelete.addAll(solrIdsOfPermissionDocsToDelete); + logger.fine("Solr docs and perm docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); + + IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfDocsToDelete); debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); } } @@ -2118,9 +2137,11 @@ public List findPermissionsInSolrOnly() throws SearchException { } else { if (!states.contains(VersionState.RELEASED)) { permissionInSolrOnly.add(docId); - } else if (!dataFileService.findMostRecentVersionFileIsIn(f).getDatasetVersion() - .equals(f.getOwner().getReleasedVersion())) { - permissionInSolrOnly.add(docId); + } else { + if(dataFileService.findFileMetadataByDatasetVersionIdAndDataFileId(f.getOwner().getReleasedVersion().getId(), f.getId()) == null) { + logger.info("Adding doc " + docId + " to list of permissions in Solr only"); + permissionInSolrOnly.add(docId); + } } } From c16fdd5396fab0f7890d221965a632889046467f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 12 Jun 2024 17:44:48 -0400 Subject: [PATCH 10/20] Fix to version check This is only used in determining the most recent version a dataset is in on the file page, e.g. for https://demo.dataverse.org/file.xhtml ?persistentId=doi:10.70122/FK2/FO0MPQ/KNG6PA&version=3.0 I confirmed that demo shows version 1 in this example whereas it should show version 2 (which this commit fixes). --- .../java/edu/harvard/iq/dataverse/DataFileServiceBean.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 7f38107af6b..21f925f8981 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -384,7 +384,8 @@ public FileMetadata findMostRecentVersionFileIsIn(DataFile file) { if (fileMetadatas == null || fileMetadatas.isEmpty()) { return null; } else { - return fileMetadatas.get(0); + // This assumes the order of filemetadatas is from first to most recent, which is true as of v6.3 + return fileMetadatas.get(fileMetadatas.size() - 1); } } From 7f56478d2f61161bb797cecdbc52669b67992cbe Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 13 Jun 2024 14:02:22 -0400 Subject: [PATCH 11/20] minor simplification --- .../java/edu/harvard/iq/dataverse/search/IndexServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 01a6039b91b..bd4aa27ba68 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -486,8 +486,8 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr StringBuilder debug = new StringBuilder(); debug.append("\ndebug:\n"); DatasetVersion latestVersion = dataset.getLatestVersion(); - String latestVersionStateString = latestVersion.getVersionState().name(); DatasetVersion.VersionState latestVersionState = latestVersion.getVersionState(); + String latestVersionStateString = latestVersionState.name(); DatasetVersion releasedVersion = dataset.getReleasedVersion(); boolean atLeastOnePublishedVersion = false; if (releasedVersion != null) { From 57b7ed92428bf24079f9fe969c4e0fedbb2cfa4e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 13 Jun 2024 14:40:04 -0400 Subject: [PATCH 12/20] cleanup --- .../iq/dataverse/search/IndexServiceBean.java | 260 ++++++------------ 1 file changed, 88 insertions(+), 172 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index bd4aa27ba68..2afb5d26082 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -13,6 +13,7 @@ import edu.harvard.iq.dataverse.datavariable.VariableMetadataUtil; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import edu.harvard.iq.dataverse.search.IndexableDataset.DatasetState; import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; @@ -476,12 +477,8 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr * @todo should we use solrDocIdentifierDataset or * IndexableObject.IndexableTypes.DATASET.getName() + "_" ? */ - // String solrIdPublished = solrDocIdentifierDataset + dataset.getId(); String solrIdPublished = determinePublishedDatasetSolrDocId(dataset); String solrIdDraftDataset = IndexableObject.IndexableTypes.DATASET.getName() + "_" + dataset.getId() + IndexableDataset.DatasetState.WORKING_COPY.getSuffix(); - // String solrIdDeaccessioned = IndexableObject.IndexableTypes.DATASET.getName() - // + "_" + dataset.getId() + - // IndexableDataset.DatasetState.DEACCESSIONED.getSuffix(); String solrIdDeaccessioned = determineDeaccessionedDatasetId(dataset); StringBuilder debug = new StringBuilder(); debug.append("\ndebug:\n"); @@ -494,112 +491,53 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr atLeastOnePublishedVersion = true; } List solrIdsOfDocsToDelete = null; - - try { - solrIdsOfDocsToDelete = findFilesOfParentDataset(dataset.getId()); - logger.fine("Existing file docs: " + String.join(", ", solrIdsOfDocsToDelete)); - //We keep the latest version's docs unless it is deaccessioned and there is no published/released version - //So skip the loop removing those docs from the delete list except in that case - if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { - List latestFileMetadatas = latestVersion.getFileMetadatas(); - String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); - for (FileMetadata fileMetadata : latestFileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() + suffix; - solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + if (logger.isLoggable(Level.FINE)) { + writeDebugInfo(debug, dataset); + } + if (doNormalSolrDocCleanUp) { + try { + solrIdsOfDocsToDelete = findFilesOfParentDataset(dataset.getId()); + logger.fine("Existing file docs: " + String.join(", ", solrIdsOfDocsToDelete)); + // We keep the latest version's docs unless it is deaccessioned and there is no + // published/released version + // So skip the loop removing those docs from the delete list except in that case + if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { + List latestFileMetadatas = latestVersion.getFileMetadatas(); + String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); + for (FileMetadata fileMetadata : latestFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() + + suffix; + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + } } - } - if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { - List releasedFileMetadatas = releasedVersion.getFileMetadatas(); - for (FileMetadata fileMetadata : releasedFileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); - solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { + List releasedFileMetadatas = releasedVersion.getFileMetadatas(); + for (FileMetadata fileMetadata : releasedFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + } } - } - //Clear any unused dataset docs - if (!latestVersion.isDraft()) { - // The latest version is released, so should delete any draft docs for the - // dataset - solrIdsOfDocsToDelete.add(solrDocIdentifierDataset + dataset.getId() + draftSuffix); - } - if (!atLeastOnePublishedVersion) { - // There's no released version, so should delete any normal state docs for the - // dataset - solrIdsOfDocsToDelete.add(solrDocIdentifierDataset + dataset.getId()); - } - if (atLeastOnePublishedVersion || !latestVersion.isDeaccessioned()) { - // There's a released version or a draft, so should delete any deaccessioned - // state docs for the dataset - solrIdsOfDocsToDelete.add(solrDocIdentifierDataset + dataset.getId() + deaccessionedSuffix); - } - } catch (SearchException | NullPointerException ex) { - logger.fine("could not run search of files to delete: " + ex); - } - logger.fine("Solr docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); - int numPublishedVersions = 0; - List versions = dataset.getVersions(); - //List solrIdsOfFilesToDelete = new ArrayList<>(); - if (logger.isLoggable(Level.FINE)) { - for (DatasetVersion datasetVersion : versions) { - Long versionDatabaseId = datasetVersion.getId(); - String versionTitle = datasetVersion.getTitle(); - String semanticVersion = datasetVersion.getSemanticVersion(); - DatasetVersion.VersionState versionState = datasetVersion.getVersionState(); - if (versionState.equals(DatasetVersion.VersionState.RELEASED)) { - numPublishedVersions += 1; + // Clear any unused dataset docs + if (!latestVersion.isDraft()) { + // The latest version is released, so should delete any draft docs for the + // dataset + solrIdsOfDocsToDelete.add(solrIdDraftDataset); } - debug.append("version found with database id " + versionDatabaseId + "\n"); - debug.append("- title: " + versionTitle + "\n"); - debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); - List fileInfo = new ArrayList<>(); - List fileMetadatas = datasetVersion.getFileMetadatas(); - - for (FileMetadata fileMetadata : fileMetadatas) { - /** - * It sounds weird but the first thing we'll do is preemptively delete the Solr - * documents of all published files. Don't worry, published files will be - * re-indexed later along with the dataset. We do this so users can delete files - * from published versions of datasets and then re-publish a new version without - * fear that their old published files (now deleted from the latest published - * version) will be searchable. See also - * https://github.com/IQSS/dataverse/issues/762 - */ - fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); + if (!atLeastOnePublishedVersion) { + // There's no released version, so should delete any normal state docs for the + // dataset + solrIdsOfDocsToDelete.add(solrIdPublished); } -// try { - /** - * Preemptively delete *all* Solr documents for files associated with the - * dataset based on a Solr query. - * - * We must query Solr for this information because the file has been deleted - * from the database ( perhaps when Solr was down, as reported in - * https://github.com/IQSS/dataverse/issues/2086 ) so the database doesn't even - * know about the file. It's an orphan. - * - * @todo This Solr query should make the iteration above based on the database - * unnecessary because it the Solr query should find all files for the - * dataset. We can probably remove the iteration above after an "index - * all" has been performed. Without an "index all" we won't be able to - * find files based on parentId because that field wasn't searchable in - * 4.0. - * - * @todo We should also delete the corresponding Solr "permission" documents for - * the files. - */ - // List allFilesForDataset = findFilesOfParentDataset(dataset.getId()); - // solrIdsOfFilesToDelete.addAll(allFilesForDataset); -// } catch (SearchException | NullPointerException ex) { -// logger.fine("could not run search of files to delete: " + ex); -// } - int numFiles = 0; - if (fileMetadatas != null) { - numFiles = fileMetadatas.size(); + if (atLeastOnePublishedVersion || !latestVersion.isDeaccessioned()) { + // There's a released version or a draft, so should delete any deaccessioned + // state docs for the dataset + solrIdsOfDocsToDelete.add(solrIdDeaccessioned); } - debug.append("- files: " + numFiles + " " + fileInfo.toString() + "\n"); + } catch (SearchException | NullPointerException ex) { + logger.fine("could not run search of files to delete: " + ex); } - } - debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); - if (doNormalSolrDocCleanUp) { - + logger.fine("Solr docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); + if(!solrIdsOfDocsToDelete.isEmpty()) { List solrIdsOfPermissionDocsToDelete = new ArrayList<>(); for(String file: solrIdsOfDocsToDelete) { @@ -636,19 +574,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr .append(indexDraftResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); - if (doNormalSolrDocCleanUp) { - String deleteDeaccessionedResult = removeDeaccessioned(dataset); - results.append("Draft exists, no need for deaccessioned version. Deletion attempted for ") - .append(solrIdDeaccessioned).append(" (and files). Result: ") - .append(deleteDeaccessionedResult).append("\n"); - } - desiredCards.put(DatasetVersion.VersionState.RELEASED, false); - if (doNormalSolrDocCleanUp) { - String deletePublishedResults = removePublished(dataset); - results.append("No published version. Attempting to delete traces of published version from index. Result: ") - .append(deletePublishedResults).append("\n"); - } /** * Desired state for existence of cards: {DRAFT=true, @@ -687,19 +613,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr results.append("No draft version. Attempting to index as deaccessioned. Result: ").append(indexDeaccessionedVersionResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.RELEASED, false); - if (doNormalSolrDocCleanUp) { - String deletePublishedResults = removePublished(dataset); - results.append("No published version. Attempting to delete traces of published version from index. Result: ").append(deletePublishedResults).append("\n"); - } - desiredCards.put(DatasetVersion.VersionState.DRAFT, false); - if (doNormalSolrDocCleanUp) { - //List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); - String deleteDraftDatasetVersionResult = removeSolrDocFromIndex(solrIdDraftDataset); - //String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); - //results.append("Attempting to delete traces of drafts. Result: ") - // .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); - } /** * Desired state for existence of cards: {DEACCESSIONED=true, @@ -741,20 +655,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr results.append("Attempted to index " + solrIdPublished).append(". Result: ").append(indexReleasedVersionResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.DRAFT, false); - if (doNormalSolrDocCleanUp) { - //List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); - String deleteDraftDatasetVersionResult = removeSolrDocFromIndex(solrIdDraftDataset); - //String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); - //results.append("The latest version is published. Attempting to delete drafts. Result: ") - // .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); - } - desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); - if (doNormalSolrDocCleanUp) { - String deleteDeaccessionedResult = removeDeaccessioned(dataset); - results.append("No need for deaccessioned version. Deletion attempted for ") - .append(solrIdDeaccessioned).append(". Result: ").append(deleteDeaccessionedResult); - } /** * Desired state for existence of cards: {RELEASED=true, @@ -801,11 +702,6 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr .append(solrIdDraftDataset).append(" (limited visibility). Result: ").append(indexDraftResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); - if (doNormalSolrDocCleanUp) { - String deleteDeaccessionedResult = removeDeaccessioned(dataset); - results.append("No need for deaccessioned version. Deletion attempted for ") - .append(solrIdDeaccessioned).append(". Result: ").append(deleteDeaccessionedResult); - } /** * Desired state for existence of cards: {DRAFT=true, @@ -843,7 +739,45 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr } } -/* private String deleteDraftFiles(List solrDocIdsForDraftFilesToDelete) { + private void writeDebugInfo(StringBuilder debug, Dataset dataset) { + List versions = dataset.getVersions(); + int numPublishedVersions = 0; + for (DatasetVersion datasetVersion : versions) { + Long versionDatabaseId = datasetVersion.getId(); + String versionTitle = datasetVersion.getTitle(); + String semanticVersion = datasetVersion.getSemanticVersion(); + DatasetVersion.VersionState versionState = datasetVersion.getVersionState(); + if (versionState.equals(DatasetVersion.VersionState.RELEASED)) { + numPublishedVersions += 1; + } + debug.append("version found with database id " + versionDatabaseId + "\n"); + debug.append("- title: " + versionTitle + "\n"); + debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); + List fileInfo = new ArrayList<>(); + List fileMetadatas = datasetVersion.getFileMetadatas(); + + for (FileMetadata fileMetadata : fileMetadatas) { + /** + * It sounds weird but the first thing we'll do is preemptively delete the Solr + * documents of all published files. Don't worry, published files will be + * re-indexed later along with the dataset. We do this so users can delete files + * from published versions of datasets and then re-publish a new version without + * fear that their old published files (now deleted from the latest published + * version) will be searchable. See also + * https://github.com/IQSS/dataverse/issues/762 + */ + fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); + } + int numFiles = 0; + if (fileMetadatas != null) { + numFiles = fileMetadatas.size(); + } + debug.append("- files: " + numFiles + " " + fileInfo.toString() + "\n"); + } + debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); + } + + /* private String deleteDraftFiles(List solrDocIdsForDraftFilesToDelete) { String deleteDraftFilesResults = ""; IndexResponse indexResponse = solrIndexService.deleteMultipleSolrIds(solrDocIdsForDraftFilesToDelete); deleteDraftFilesResults = indexResponse.toString(); @@ -925,15 +859,17 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.DEACCESSIONED); -// String deleteFilesResult = removeMultipleSolrDocs(docIds); -// result.append(deleteFilesResult); - return result.toString(); - } - - private String removePublished(Dataset dataset) { - StringBuilder result = new StringBuilder(); - String deletePublishedResult = removeSolrDocFromIndex(determinePublishedDatasetSolrDocId(dataset)); - result.append(deletePublishedResult); -// List docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.PUBLISHED); -// String deleteFilesResult = removeMultipleSolrDocs(docIds); -// result.append(deleteFilesResult); - return result.toString(); - } - private Dataverse findRootDataverseCached() { if (true) { /** From a52a8384ffebb2244e687aef1cbe479aa06f5b76 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 13 Jun 2024 16:03:33 -0400 Subject: [PATCH 13/20] docs --- doc/release-notes/10579-avoid-solr-deletes.md | 9 +++++++++ doc/sphinx-guides/source/developers/performance.rst | 1 + doc/sphinx-guides/source/installation/config.rst | 3 +++ 3 files changed, 13 insertions(+) create mode 100644 doc/release-notes/10579-avoid-solr-deletes.md diff --git a/doc/release-notes/10579-avoid-solr-deletes.md b/doc/release-notes/10579-avoid-solr-deletes.md new file mode 100644 index 00000000000..1062a2fb78f --- /dev/null +++ b/doc/release-notes/10579-avoid-solr-deletes.md @@ -0,0 +1,9 @@ +A features flag called "reduce-solr-deletes" has been added to improve how datafiles are indexed. When the flag is enabled, +Dataverse wil avoid pre-emptively deleting existing solr documents for the files prior to sending updated information. This +should improve performance and will allow additional optimizations going forward. + +The /api/admin/index/status and /api/admin/index/clear-orphans calls +(see https://guides.dataverse.org/en/latest/admin/solr-search-index.html#index-and-database-consistency) +will now find and remove (respectively) additional permissions related solr documents that were not being detected before. +Reducing the overall number of documents will improve solr performance and large sites may wish to periodically call the +clear-orphans API. \ No newline at end of file diff --git a/doc/sphinx-guides/source/developers/performance.rst b/doc/sphinx-guides/source/developers/performance.rst index 562fa330d75..0044899a581 100644 --- a/doc/sphinx-guides/source/developers/performance.rst +++ b/doc/sphinx-guides/source/developers/performance.rst @@ -121,6 +121,7 @@ While in the past Solr performance hasn't been much of a concern, in recent year We are tracking performance problems in `#10469 `_. In a meeting with a Solr expert on 2024-05-10 we were advised to avoid joins as much as possible. (It was acknowledged that many Solr users make use of joins because they have to, like we do, to keep some documents private.) Toward that end we have added two feature flags called ``avoid-expensive-solr-join`` and ``add-publicobject-solr-field`` as explained under :ref:`feature-flags`. It was confirmed experimentally that performing the join on all the public objects (published collections, datasets and files), i.e., the bulk of the content in the search index, was indeed very expensive, especially on a large instance the size of the IQSS prod. archive, especially under indexing load. We confirmed that it was in fact unnecessary and were able to replace it with a boolean field directly in the indexed documents, which is achieved by the two feature flags above. However, as of writing this, this mechanism should still be considered experimental. +Another flag, ``reduce-solr-deletes``, avoids deleting solr documents for files in a dataset prior to sending updates. This is expected to improve indexing performance to some extent and is a step towards avoiding unnecessary updates (i.e. when a doc would not change). Datasets with Large Numbers of Files or Versions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 8fb9460892b..f6e33a2678d 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3274,6 +3274,9 @@ please find all known feature flags below. Any of these flags can be activated u * - add-publicobject-solr-field - Adds an extra boolean field `PublicObject_b:true` for public content (published Collections, Datasets and Files). Once reindexed with these fields, we can rely on it to remove a very expensive Solr join on all such documents in Solr queries, significantly improving overall performance (by enabling the feature flag above, `avoid-expensive-solr-join`). These two flags are separate so that an instance can reindex their holdings before enabling the optimization in searches, thus avoiding having their public objects temporarily disappear from search results while the reindexing is in progress. - ``Off`` + * - reduce-solr-deletes + - Avoids deleting and recreating solr documents for dataset files when reindexing. + - ``Off`` **Note:** Feature flags can be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_FEATURE_XXX`` (e.g. ``DATAVERSE_FEATURE_API_SESSION_AUTH=1``). These environment variables can be set in your shell before starting Payara. If you are using :doc:`Docker for development `, you can set them in the `docker compose `_ file. From 1150ff4fa718eb3665eeddad420b33589d9c574d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 13 Jun 2024 16:03:45 -0400 Subject: [PATCH 14/20] feature flag --- .../iq/dataverse/search/IndexServiceBean.java | 269 +++++++++++++----- .../iq/dataverse/settings/FeatureFlags.java | 13 + 2 files changed, 217 insertions(+), 65 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 2afb5d26082..1d1098a33d3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -482,6 +482,78 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr String solrIdDeaccessioned = determineDeaccessionedDatasetId(dataset); StringBuilder debug = new StringBuilder(); debug.append("\ndebug:\n"); + boolean reduceSolrDeletes = FeatureFlags.REDUCE_SOLR_DELETES.enabled(); + if (!reduceSolrDeletes) { + int numPublishedVersions = 0; + List versions = dataset.getVersions(); + List solrIdsOfFilesToDelete = new ArrayList<>(); + for (DatasetVersion datasetVersion : versions) { + Long versionDatabaseId = datasetVersion.getId(); + String versionTitle = datasetVersion.getTitle(); + String semanticVersion = datasetVersion.getSemanticVersion(); + DatasetVersion.VersionState versionState = datasetVersion.getVersionState(); + if (versionState.equals(DatasetVersion.VersionState.RELEASED)) { + numPublishedVersions += 1; + } + debug.append("version found with database id " + versionDatabaseId + "\n"); + debug.append("- title: " + versionTitle + "\n"); + debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); + List fileMetadatas = datasetVersion.getFileMetadatas(); + List fileInfo = new ArrayList<>(); + for (FileMetadata fileMetadata : fileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); + /** + * It sounds weird but the first thing we'll do is preemptively + * delete the Solr documents of all published files. Don't + * worry, published files will be re-indexed later along with + * the dataset. We do this so users can delete files from + * published versions of datasets and then re-publish a new + * version without fear that their old published files (now + * deleted from the latest published version) will be + * searchable. See also + * https://github.com/IQSS/dataverse/issues/762 + */ + solrIdsOfFilesToDelete.add(solrIdOfPublishedFile); + fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); + } + try { + /** + * Preemptively delete *all* Solr documents for files associated + * with the dataset based on a Solr query. + * + * We must query Solr for this information because the file has + * been deleted from the database ( perhaps when Solr was down, + * as reported in https://github.com/IQSS/dataverse/issues/2086 + * ) so the database doesn't even know about the file. It's an + * orphan. + * + * @todo This Solr query should make the iteration above based + * on the database unnecessary because it the Solr query should + * find all files for the dataset. We can probably remove the + * iteration above after an "index all" has been performed. + * Without an "index all" we won't be able to find files based + * on parentId because that field wasn't searchable in 4.0. + * + * @todo We should also delete the corresponding Solr + * "permission" documents for the files. + */ + List allFilesForDataset = findFilesOfParentDataset(dataset.getId()); + solrIdsOfFilesToDelete.addAll(allFilesForDataset); + } catch (SearchException | NullPointerException ex) { + logger.fine("could not run search of files to delete: " + ex); + } + int numFiles = 0; + if (fileMetadatas != null) { + numFiles = fileMetadatas.size(); + } + debug.append("- files: " + numFiles + " " + fileInfo.toString() + "\n"); + } + debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); + if (doNormalSolrDocCleanUp) { + IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfFilesToDelete); + debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); + } + } DatasetVersion latestVersion = dataset.getLatestVersion(); DatasetVersion.VersionState latestVersionState = latestVersion.getVersionState(); String latestVersionStateString = latestVersionState.name(); @@ -490,65 +562,69 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr if (releasedVersion != null) { atLeastOnePublishedVersion = true; } - List solrIdsOfDocsToDelete = null; - if (logger.isLoggable(Level.FINE)) { - writeDebugInfo(debug, dataset); - } - if (doNormalSolrDocCleanUp) { - try { - solrIdsOfDocsToDelete = findFilesOfParentDataset(dataset.getId()); - logger.fine("Existing file docs: " + String.join(", ", solrIdsOfDocsToDelete)); - // We keep the latest version's docs unless it is deaccessioned and there is no - // published/released version - // So skip the loop removing those docs from the delete list except in that case - if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { - List latestFileMetadatas = latestVersion.getFileMetadatas(); - String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); - for (FileMetadata fileMetadata : latestFileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() - + suffix; - solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + if (reduceSolrDeletes) { + List solrIdsOfDocsToDelete = null; + if (logger.isLoggable(Level.FINE)) { + writeDebugInfo(debug, dataset); + } + if (doNormalSolrDocCleanUp) { + try { + solrIdsOfDocsToDelete = findFilesOfParentDataset(dataset.getId()); + logger.fine("Existing file docs: " + String.join(", ", solrIdsOfDocsToDelete)); + // We keep the latest version's docs unless it is deaccessioned and there is no + // published/released version + // So skip the loop removing those docs from the delete list except in that case + if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { + List latestFileMetadatas = latestVersion.getFileMetadatas(); + String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); + for (FileMetadata fileMetadata : latestFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() + + suffix; + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + } } - } - if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { - List releasedFileMetadatas = releasedVersion.getFileMetadatas(); - for (FileMetadata fileMetadata : releasedFileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); - solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { + List releasedFileMetadatas = releasedVersion.getFileMetadatas(); + for (FileMetadata fileMetadata : releasedFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + } } + // Clear any unused dataset docs + if (!latestVersion.isDraft()) { + // The latest version is released, so should delete any draft docs for the + // dataset + solrIdsOfDocsToDelete.add(solrIdDraftDataset); + } + if (!atLeastOnePublishedVersion) { + // There's no released version, so should delete any normal state docs for the + // dataset + solrIdsOfDocsToDelete.add(solrIdPublished); + } + if (atLeastOnePublishedVersion || !latestVersion.isDeaccessioned()) { + // There's a released version or a draft, so should delete any deaccessioned + // state docs for the dataset + solrIdsOfDocsToDelete.add(solrIdDeaccessioned); + } + } catch (SearchException | NullPointerException ex) { + logger.fine("could not run search of files to delete: " + ex); } - // Clear any unused dataset docs - if (!latestVersion.isDraft()) { - // The latest version is released, so should delete any draft docs for the - // dataset - solrIdsOfDocsToDelete.add(solrIdDraftDataset); - } - if (!atLeastOnePublishedVersion) { - // There's no released version, so should delete any normal state docs for the - // dataset - solrIdsOfDocsToDelete.add(solrIdPublished); - } - if (atLeastOnePublishedVersion || !latestVersion.isDeaccessioned()) { - // There's a released version or a draft, so should delete any deaccessioned - // state docs for the dataset - solrIdsOfDocsToDelete.add(solrIdDeaccessioned); - } - } catch (SearchException | NullPointerException ex) { - logger.fine("could not run search of files to delete: " + ex); - } - logger.fine("Solr docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); + logger.fine("Solr docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); - if(!solrIdsOfDocsToDelete.isEmpty()) { - List solrIdsOfPermissionDocsToDelete = new ArrayList<>(); - for(String file: solrIdsOfDocsToDelete) { - //Also remove associated permission docs - solrIdsOfPermissionDocsToDelete.add(file + discoverabilityPermissionSuffix); + if (!solrIdsOfDocsToDelete.isEmpty()) { + List solrIdsOfPermissionDocsToDelete = new ArrayList<>(); + for (String file : solrIdsOfDocsToDelete) { + // Also remove associated permission docs + solrIdsOfPermissionDocsToDelete.add(file + discoverabilityPermissionSuffix); + } + solrIdsOfDocsToDelete.addAll(solrIdsOfPermissionDocsToDelete); + logger.fine("Solr docs and perm docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); + + IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService + .deleteMultipleSolrIds(solrIdsOfDocsToDelete); + debug.append("result of attempt to premptively deleted published files before reindexing: " + + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); } - solrIdsOfDocsToDelete.addAll(solrIdsOfPermissionDocsToDelete); - logger.fine("Solr docs and perm docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); - - IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfDocsToDelete); - debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); } } @@ -574,7 +650,19 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr .append(indexDraftResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { + String deleteDeaccessionedResult = removeDeaccessioned(dataset); + results.append("Draft exists, no need for deaccessioned version. Deletion attempted for ") + .append(solrIdDeaccessioned).append(" (and files). Result: ") + .append(deleteDeaccessionedResult).append("\n"); + } + desiredCards.put(DatasetVersion.VersionState.RELEASED, false); + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { + String deletePublishedResults = removePublished(dataset); + results.append("No published version. Attempting to delete traces of published version from index. Result: ") + .append(deletePublishedResults).append("\n"); + } /** * Desired state for existence of cards: {DRAFT=true, @@ -613,7 +701,19 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr results.append("No draft version. Attempting to index as deaccessioned. Result: ").append(indexDeaccessionedVersionResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.RELEASED, false); + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { + String deletePublishedResults = removePublished(dataset); + results.append("No published version. Attempting to delete traces of published version from index. Result: ").append(deletePublishedResults).append("\n"); + } + desiredCards.put(DatasetVersion.VersionState.DRAFT, false); + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { + List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); + String deleteDraftDatasetVersionResult = removeSolrDocFromIndex(solrIdDraftDataset); + String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); + results.append("Attempting to delete traces of drafts. Result: ") + .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); + } /** * Desired state for existence of cards: {DEACCESSIONED=true, @@ -655,7 +755,20 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr results.append("Attempted to index " + solrIdPublished).append(". Result: ").append(indexReleasedVersionResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.DRAFT, false); + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { + List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); + String deleteDraftDatasetVersionResult = removeSolrDocFromIndex(solrIdDraftDataset); + String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); + results.append("The latest version is published. Attempting to delete drafts. Result: ") + .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); + } + desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { + String deleteDeaccessionedResult = removeDeaccessioned(dataset); + results.append("No need for deaccessioned version. Deletion attempted for ") + .append(solrIdDeaccessioned).append(". Result: ").append(deleteDeaccessionedResult); + } /** * Desired state for existence of cards: {RELEASED=true, @@ -702,6 +815,11 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr .append(solrIdDraftDataset).append(" (limited visibility). Result: ").append(indexDraftResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { + String deleteDeaccessionedResult = removeDeaccessioned(dataset); + results.append("No need for deaccessioned version. Deletion attempted for ") + .append(solrIdDeaccessioned).append(". Result: ").append(deleteDeaccessionedResult); + } /** * Desired state for existence of cards: {DRAFT=true, @@ -777,13 +895,6 @@ private void writeDebugInfo(StringBuilder debug, Dataset dataset) { debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); } - /* private String deleteDraftFiles(List solrDocIdsForDraftFilesToDelete) { - String deleteDraftFilesResults = ""; - IndexResponse indexResponse = solrIndexService.deleteMultipleSolrIds(solrDocIdsForDraftFilesToDelete); - deleteDraftFilesResults = indexResponse.toString(); - return deleteDraftFilesResults; - } -*/ private IndexResponse indexDatasetPermissions(Dataset dataset) { boolean disabledForDebugging = false; if (disabledForDebugging) { @@ -866,10 +977,8 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.DEACCESSIONED); + String deleteFilesResult = removeMultipleSolrDocs(docIds); + result.append(deleteFilesResult); + return result.toString(); + } + + //Only used when FeatureFlags.REDUCE_SOLR_DELETES is disabled + private String removePublished(Dataset dataset) { + StringBuilder result = new StringBuilder(); + String deletePublishedResult = removeSolrDocFromIndex(determinePublishedDatasetSolrDocId(dataset)); + result.append(deletePublishedResult); + List docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.PUBLISHED); + String deleteFilesResult = removeMultipleSolrDocs(docIds); + result.append(deleteFilesResult); + return result.toString(); + } + + // Only used when FeatureFlags.REDUCE_SOLR_DELETES is disabled + private String deleteDraftFiles(List solrDocIdsForDraftFilesToDelete) { + String deleteDraftFilesResults = ""; + IndexResponse indexResponse = solrIndexService.deleteMultipleSolrIds(solrDocIdsForDraftFilesToDelete); + deleteDraftFilesResults = indexResponse.toString(); + return deleteDraftFilesResults; + } + private Dataverse findRootDataverseCached() { if (true) { /** diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java index 14a7ab86f22..d523bf92e63 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java @@ -58,6 +58,19 @@ public enum FeatureFlags { * @since Dataverse 6.3 */ ADD_PUBLICOBJECT_SOLR_FIELD("add-publicobject-solr-field"), + /** + * Dataverse normally deletes all solr documents related to a dataset's files + * when the dataset is reindexed. With this flag enabled, additional logic is + * added to the reindex process to delete only the solr documents that are no + * longer needed. (Required docs will be updated rather than deleted and + * replaced.) Enabling this feature flag should make the reindex process + * faster without impacting the search results. + * + * @apiNote Raise flag by setting + * "dataverse.feature.reduce-solr-deletes" + * @since Dataverse 6.3 + */ + REDUCE_SOLR_DELETES("reduce-solr-deletes"), ; final String flag; From 058c28b21e500455d4dd51c13594b1073e316274 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 13 Jun 2024 16:06:43 -0400 Subject: [PATCH 15/20] info -> fine --- .../edu/harvard/iq/dataverse/search/IndexServiceBean.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 1d1098a33d3..567ef1ecbd8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -2180,7 +2180,7 @@ public List findPermissionsInSolrOnly() throws SearchException { } else if (obj instanceof DataFile f) { List states = dataFileService.findVersionStates(f.getId()); Set strings = states.stream().map(VersionState::toString).collect(Collectors.toSet()); - logger.info("for " + docId + " states: " + String.join(", ", strings)); + logger.fine("States for " + docId + ": " + String.join(", ", strings)); if (docId.endsWith("draft_permission")) { if (!states.contains(VersionState.DRAFT)) { permissionInSolrOnly.add(docId); @@ -2194,7 +2194,7 @@ public List findPermissionsInSolrOnly() throws SearchException { permissionInSolrOnly.add(docId); } else { if(dataFileService.findFileMetadataByDatasetVersionIdAndDataFileId(f.getOwner().getReleasedVersion().getId(), f.getId()) == null) { - logger.info("Adding doc " + docId + " to list of permissions in Solr only"); + logger.fine("Adding doc " + docId + " to list of permissions in Solr only"); permissionInSolrOnly.add(docId); } } From 1b0d3a1cc20438c3d3236b9cf190cedc5903a82b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 13 Jun 2024 17:27:03 -0400 Subject: [PATCH 16/20] note eliminating more orphan perm docs --- doc/sphinx-guides/source/developers/performance.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/performance.rst b/doc/sphinx-guides/source/developers/performance.rst index 0044899a581..6c864bec257 100644 --- a/doc/sphinx-guides/source/developers/performance.rst +++ b/doc/sphinx-guides/source/developers/performance.rst @@ -121,7 +121,7 @@ While in the past Solr performance hasn't been much of a concern, in recent year We are tracking performance problems in `#10469 `_. In a meeting with a Solr expert on 2024-05-10 we were advised to avoid joins as much as possible. (It was acknowledged that many Solr users make use of joins because they have to, like we do, to keep some documents private.) Toward that end we have added two feature flags called ``avoid-expensive-solr-join`` and ``add-publicobject-solr-field`` as explained under :ref:`feature-flags`. It was confirmed experimentally that performing the join on all the public objects (published collections, datasets and files), i.e., the bulk of the content in the search index, was indeed very expensive, especially on a large instance the size of the IQSS prod. archive, especially under indexing load. We confirmed that it was in fact unnecessary and were able to replace it with a boolean field directly in the indexed documents, which is achieved by the two feature flags above. However, as of writing this, this mechanism should still be considered experimental. -Another flag, ``reduce-solr-deletes``, avoids deleting solr documents for files in a dataset prior to sending updates. This is expected to improve indexing performance to some extent and is a step towards avoiding unnecessary updates (i.e. when a doc would not change). +Another flag, ``reduce-solr-deletes``, avoids deleting solr documents for files in a dataset prior to sending updates. It also eliminates several causes of orphan permission documents. This is expected to improve indexing performance to some extent and is a step towards avoiding unnecessary updates (i.e. when a doc would not change). Datasets with Large Numbers of Files or Versions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From e24405cd0f3a5d50e5c46916f84108aca318fa89 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 10:35:45 -0400 Subject: [PATCH 17/20] skip remove loops if list starts empty --- .../iq/dataverse/search/IndexServiceBean.java | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 42b740076ec..63ce35114e2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -571,23 +571,26 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr try { solrIdsOfDocsToDelete = findFilesOfParentDataset(dataset.getId()); logger.fine("Existing file docs: " + String.join(", ", solrIdsOfDocsToDelete)); - // We keep the latest version's docs unless it is deaccessioned and there is no - // published/released version - // So skip the loop removing those docs from the delete list except in that case - if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { - List latestFileMetadatas = latestVersion.getFileMetadatas(); - String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); - for (FileMetadata fileMetadata : latestFileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() - + suffix; - solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + if (!solrIdsOfDocsToDelete.isEmpty()) { + // We keep the latest version's docs unless it is deaccessioned and there is no + // published/released version + // So skip the loop removing those docs from the delete list except in that case + if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { + List latestFileMetadatas = latestVersion.getFileMetadatas(); + String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); + for (FileMetadata fileMetadata : latestFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + + fileMetadata.getDataFile().getId() + suffix; + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + } } - } - if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { - List releasedFileMetadatas = releasedVersion.getFileMetadatas(); - for (FileMetadata fileMetadata : releasedFileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); - solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { + List releasedFileMetadatas = releasedVersion.getFileMetadatas(); + for (FileMetadata fileMetadata : releasedFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + + fileMetadata.getDataFile().getId(); + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + } } } // Clear any unused dataset docs From b464b24296a15ffcb137d283f07965a2a2449e4b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 15:06:48 -0400 Subject: [PATCH 18/20] drop COMMIT_WITHIN which breaks autoSoftCommit by maxTime in solrconfig --- .../harvard/iq/dataverse/search/IndexServiceBean.java | 11 +++++------ .../iq/dataverse/search/SolrIndexServiceBean.java | 6 +++--- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 0102459ab9f..10e6c2e6516 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -312,7 +312,7 @@ public Future indexDataverse(Dataverse dataverse, boolean processPaths) String status; try { if (dataverse.getId() != null) { - solrClientService.getSolrClient().add(docs, COMMIT_WITHIN); + solrClientService.getSolrClient().add(docs); } else { logger.info("WARNING: indexing of a dataverse with no id attempted"); } @@ -345,7 +345,6 @@ public void indexDatasetInNewTransaction(Long datasetId) { //Dataset dataset) { private static final Map INDEXING_NOW = new ConcurrentHashMap<>(); // semaphore for async indexing private static final Semaphore ASYNC_INDEX_SEMAPHORE = new Semaphore(JvmSettings.MAX_ASYNC_INDEXES.lookupOptional(Integer.class).orElse(4), true); - static final int COMMIT_WITHIN = 30000; //Same as current autoHardIndex time @Inject @Metric(name = "index_permit_wait_time", absolute = true, unit = MetricUnits.NANOSECONDS, @@ -1562,7 +1561,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d final SolrInputDocuments docs = toSolrDocs(indexableDataset, datafilesInDraftVersion); try { - solrClientService.getSolrClient().add(docs.getDocuments(), COMMIT_WITHIN); + solrClientService.getSolrClient().add(docs.getDocuments()); } catch (SolrServerException | IOException ex) { if (ex.getCause() instanceof SolrServerException) { throw new SolrServerException(ex); @@ -1814,7 +1813,7 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc sid.removeField(SearchFields.SUBTREE); sid.addField(SearchFields.SUBTREE, paths); - UpdateResponse addResponse = solrClientService.getSolrClient().add(sid, COMMIT_WITHIN); + UpdateResponse addResponse = solrClientService.getSolrClient().add(sid); if (object.isInstanceofDataset()) { for (DataFile df : dataset.getFiles()) { solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, df.getId().toString())); @@ -1869,7 +1868,7 @@ public String delete(Dataverse doomed) { logger.fine("deleting Solr document for dataverse " + doomed.getId()); UpdateResponse updateResponse; try { - updateResponse = solrClientService.getSolrClient().deleteById(solrDocIdentifierDataverse + doomed.getId(), COMMIT_WITHIN); + updateResponse = solrClientService.getSolrClient().deleteById(solrDocIdentifierDataverse + doomed.getId()); } catch (SolrServerException | IOException ex) { return ex.toString(); } @@ -1889,7 +1888,7 @@ public String removeSolrDocFromIndex(String doomed) { logger.fine("deleting Solr document: " + doomed); UpdateResponse updateResponse; try { - updateResponse = solrClientService.getSolrClient().deleteById(doomed, COMMIT_WITHIN); + updateResponse = solrClientService.getSolrClient().deleteById(doomed); } catch (SolrServerException | IOException ex) { return ex.toString(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java index 19235bb5a14..cfe29ea08c7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java @@ -356,7 +356,7 @@ private void persistToSolr(Collection docs) throws SolrServer /** * @todo Do something with these responses from Solr. */ - UpdateResponse addResponse = solrClientService.getSolrClient().add(docs, IndexServiceBean.COMMIT_WITHIN); + UpdateResponse addResponse = solrClientService.getSolrClient().add(docs); } public IndexResponse indexPermissionsOnSelfAndChildren(long definitionPointId) { @@ -496,7 +496,7 @@ public IndexResponse deleteMultipleSolrIds(List solrIdsToDelete) { return new IndexResponse("nothing to delete"); } try { - solrClientService.getSolrClient().deleteById(solrIdsToDelete, IndexServiceBean.COMMIT_WITHIN); + solrClientService.getSolrClient().deleteById(solrIdsToDelete); } catch (SolrServerException | IOException ex) { /** * @todo mark these for re-deletion @@ -509,7 +509,7 @@ public IndexResponse deleteMultipleSolrIds(List solrIdsToDelete) { public JsonObjectBuilder deleteAllFromSolrAndResetIndexTimes() throws SolrServerException, IOException { JsonObjectBuilder response = Json.createObjectBuilder(); logger.info("attempting to delete all Solr documents before a complete re-index"); - solrClientService.getSolrClient().deleteByQuery("*:*", IndexServiceBean.COMMIT_WITHIN); + solrClientService.getSolrClient().deleteByQuery("*:*"); int numRowsAffected = dvObjectService.clearAllIndexTimes(); response.add(numRowsClearedByClearAllIndexTimes, numRowsAffected); response.add(messageString, "Solr index and database index timestamps cleared."); From 9fc757f9fa37ef43383a0ad628fd137231249de6 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 25 Jun 2024 15:21:58 -0400 Subject: [PATCH 19/20] 10581 request access email fix (#10653) * #10581 fix subject line of request access email * #10581 add sorter * #10581 addl user info for request email * #10581 add break between custom Q responses * #10581 remove orderby --- .../iq/dataverse/GuestbookResponse.java | 40 +++++++++++++++---- .../harvard/iq/dataverse/MailServiceBean.java | 2 +- .../harvard/iq/dataverse/util/MailUtil.java | 5 ++- src/main/java/propertyFiles/Bundle.properties | 6 ++- .../iq/dataverse/util/MailUtilTest.java | 8 +++- 5 files changed, 49 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java index 9041ccf887c..1ea7d02791d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java @@ -17,6 +17,8 @@ import java.util.List; import jakarta.persistence.*; import jakarta.validation.constraints.Size; +import java.util.Collections; +import java.util.Comparator; /** * @@ -178,7 +180,7 @@ public GuestbookResponse(GuestbookResponse source){ this.setSessionId(source.getSessionId()); List customQuestionResponses = new ArrayList<>(); if (!source.getCustomQuestionResponses().isEmpty()){ - for (CustomQuestionResponse customQuestionResponse : source.getCustomQuestionResponses() ){ + for (CustomQuestionResponse customQuestionResponse : source.getCustomQuestionResponsesSorted() ){ CustomQuestionResponse customQuestionResponseAdd = new CustomQuestionResponse(); customQuestionResponseAdd.setResponse(customQuestionResponse.getResponse()); customQuestionResponseAdd.setCustomQuestion(customQuestionResponse.getCustomQuestion()); @@ -254,6 +256,18 @@ public String getResponseDate() { public List getCustomQuestionResponses() { return customQuestionResponses; } + + public List getCustomQuestionResponsesSorted(){ + + Collections.sort(customQuestionResponses, (CustomQuestionResponse cqr1, CustomQuestionResponse cqr2) -> { + int a = cqr1.getCustomQuestion().getDisplayOrder(); + int b = cqr2.getCustomQuestion().getDisplayOrder(); + return Integer.valueOf(a).compareTo(b); + }); + + + return customQuestionResponses; + } public void setCustomQuestionResponses(List customQuestionResponses) { this.customQuestionResponses = customQuestionResponses; @@ -317,7 +331,11 @@ public void setSessionId(String sessionId) { this.sessionId= sessionId; } - public String toHtmlFormattedResponse() { + public String toHtmlFormattedResponse(){ + return toHtmlFormattedResponse(null); + } + + public String toHtmlFormattedResponse(AuthenticatedUser requestor) { StringBuilder sb = new StringBuilder(); @@ -326,17 +344,25 @@ public String toHtmlFormattedResponse() { sb.append(BundleUtil.getStringFromBundle("dataset.guestbookResponse.respondent") + "
    \n
  • " + BundleUtil.getStringFromBundle("name") + ": " + getName() + "
  • \n
  • "); sb.append(" " + BundleUtil.getStringFromBundle("email") + ": " + getEmail() + "
  • \n
  • "); - sb.append( - " " + BundleUtil.getStringFromBundle("institution") + ": " + wrapNullAnswer(getInstitution()) + "
  • \n
  • "); - sb.append(" " + BundleUtil.getStringFromBundle("position") + ": " + wrapNullAnswer(getPosition()) + "
\n"); + sb.append(" " + BundleUtil.getStringFromBundle("institution") + ": " + wrapNullAnswer(getInstitution()) + "\n
  • "); + sb.append(" " + BundleUtil.getStringFromBundle("position") + ": " + wrapNullAnswer(getPosition()) + "
  • "); + + //Add requestor information to response to help dataset admin with request processing + if (requestor != null){ + sb.append("\n
  • " + BundleUtil.getStringFromBundle("dataset.guestbookResponse.requestor.id") + ": " + requestor.getId()+ "
  • "); + sb.append("\n
  • " + BundleUtil.getStringFromBundle("dataset.guestbookResponse.requestor.identifier") + ": " + requestor.getIdentifier()+ "
  • \n"); + } else { + sb.append("\n"); + } + sb.append(BundleUtil.getStringFromBundle("dataset.guestbookResponse.guestbook.additionalQuestions") + ":
      \n"); - for (CustomQuestionResponse cqr : getCustomQuestionResponses()) { + for (CustomQuestionResponse cqr : getCustomQuestionResponsesSorted()) { sb.append("
    • " + BundleUtil.getStringFromBundle("dataset.guestbookResponse.question") + ": " + cqr.getCustomQuestion().getQuestionString() + "
      " + BundleUtil.getStringFromBundle("dataset.guestbookResponse.answer") + ": " - + wrapNullAnswer(cqr.getResponse()) + "
    • \n"); + + wrapNullAnswer(cqr.getResponse()) + "\n
      "); } sb.append("
    "); return sb.toString(); diff --git a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java index 1eee9c65501..7359ef8eb33 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java @@ -456,7 +456,7 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio GuestbookResponse gbr = far.getGuestbookResponse(); if (gbr != null) { messageText += MessageFormat.format( - BundleUtil.getStringFromBundle("notification.email.requestFileAccess.guestbookResponse"), gbr.toHtmlFormattedResponse()); + BundleUtil.getStringFromBundle("notification.email.requestFileAccess.guestbookResponse"), gbr.toHtmlFormattedResponse(requestor)); } return messageText; case GRANTFILEACCESS: diff --git a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java index ccec3e5f09b..36c249de834 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java @@ -35,7 +35,10 @@ public static String getSubjectTextBasedOnNotification(UserNotification userNoti case CREATEDV: return BundleUtil.getStringFromBundle("notification.email.create.dataverse.subject", rootDvNameAsList); case REQUESTFILEACCESS: - return BundleUtil.getStringFromBundle("notification.email.request.file.access.subject", Arrays.asList(rootDvNameAsList.get(0), datasetDisplayName)); + String userNameFirst = userNotification.getRequestor().getFirstName(); + String userNameLast = userNotification.getRequestor().getLastName(); + String userIdentifier = userNotification.getRequestor().getIdentifier(); + return BundleUtil.getStringFromBundle("notification.email.request.file.access.subject", Arrays.asList(rootDvNameAsList.get(0), userNameFirst, userNameLast, userIdentifier, datasetDisplayName)); case REQUESTEDFILEACCESS: return BundleUtil.getStringFromBundle("notification.email.requested.file.access.subject", Arrays.asList(rootDvNameAsList.get(0), datasetDisplayName)); case GRANTFILEACCESS: diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 2996ccb509b..3e19a19d8dc 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -757,8 +757,8 @@ dashboard.card.datamove.dataset.command.error.indexingProblem=Dataset could not notification.email.create.dataverse.subject={0}: Your dataverse has been created notification.email.create.dataset.subject={0}: Dataset "{1}" has been created notification.email.dataset.created.subject={0}: Dataset "{1}" has been created -notification.email.request.file.access.subject={0}: Access has been requested for a restricted file in dataset "{1}" -notification.email.requested.file.access.subject={0}: You have requested access to a restricted file in dataset "{1}" +notification.email.request.file.access.subject={0}: {1} {2} ({3}) requested access to dataset "{4}" +notification.email.requested.file.access.subject={0}: You have requested access to a restricted file in dataset "{1}" notification.email.grant.file.access.subject={0}: You have been granted access to a restricted file notification.email.rejected.file.access.subject={0}: Your request for access to a restricted file has been rejected notification.email.submit.dataset.subject={0}: Dataset "{1}" has been submitted for review @@ -1400,6 +1400,8 @@ dataset.guestbookResponse.respondent=Respondent dataset.guestbookResponse.question=Q dataset.guestbookResponse.answer=A dataset.guestbookResponse.noResponse=(No Response) +dataset.guestbookResponse.requestor.id=authenticatedUserId +dataset.guestbookResponse.requestor.identifier=authenticatedUserIdentifier # dataset.xhtml diff --git a/src/test/java/edu/harvard/iq/dataverse/util/MailUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/MailUtilTest.java index 0756c4650fb..f9236ab8338 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/MailUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/MailUtilTest.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.UserNotification; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; @@ -82,7 +83,12 @@ public void testSubjectRevokeRole() { @Test public void testSubjectRequestFileAccess() { userNotification.setType(UserNotification.Type.REQUESTFILEACCESS); - assertEquals("LibraScholar: Access has been requested for a restricted file in dataset \"\"", MailUtil.getSubjectTextBasedOnNotification(userNotification, null)); + AuthenticatedUser requestor = new AuthenticatedUser(); + requestor.setFirstName("Tom"); + requestor.setLastName("Jones"); + requestor.setUserIdentifier("TJ-1234"); + userNotification.setRequestor(requestor); + assertEquals("LibraScholar: Tom Jones (@TJ-1234) requested access to dataset \"\"", MailUtil.getSubjectTextBasedOnNotification(userNotification, null)); } @Test From fc020be4e95c163509a055f452111aadd06eddcb Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 15:37:03 -0400 Subject: [PATCH 20/20] missed use --- .../java/edu/harvard/iq/dataverse/search/IndexServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 10e6c2e6516..b7b2760e79b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1826,7 +1826,7 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc } sid.removeField(SearchFields.SUBTREE); sid.addField(SearchFields.SUBTREE, paths); - addResponse = solrClientService.getSolrClient().add(sid, COMMIT_WITHIN); + addResponse = solrClientService.getSolrClient().add(sid); } } }