Skip to content

Commit

Permalink
Merge pull request #10945 from IQSS/10888-add-api-for-comparing-datas…
Browse files Browse the repository at this point in the history
…et-versions

Add API endpoint for comparing Dataset Versions
  • Loading branch information
ofahimIQSS authored Nov 22, 2024
2 parents ffcc65c + 558577e commit a3a11a4
Show file tree
Hide file tree
Showing 11 changed files with 484 additions and 16 deletions.
21 changes: 21 additions & 0 deletions doc/release-notes/10888-add-api-for-comparing-dataset-versions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
The following API have been added:

/api/datasets/{persistentId}/versions/{versionId0}/compare/{versionId1}

This API lists the changes between 2 dataset versions. The Json response shows the changes per field within the Metadata block and the Terms Of Access. Also listed are the files that have been added or removed. Files that have been modified will also display the new file data plus the fields that have been modified.
When compare includes an unpublished/draft version the api token must be associated with a user having view unpublished privileges
An error will be returned if VERSION0 was not created before VERSION1

Example of Metadata Block field change:
```json
{
"blockName": "Life Sciences Metadata",
"changed": [
{
"fieldName": "Design Type",
"oldValue": "",
"newValue": "Parallel Group Design; Nested Case Control Design"
}
]
}
```
22 changes: 22 additions & 0 deletions doc/sphinx-guides/source/api/native-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1698,6 +1698,28 @@ The fully expanded example above (without environment variables) looks like this
curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/metadata/citation"
Compare Versions of a Dataset
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Returns a list of fields that have changed between 2 Dataset versions within the Metadata and Terms of Access. Also includes the files that have been added or removed as well as files that have been modified.
When compare includes an unpublished/draft version the api token must be associated with a user having view unpublished privileges
An error will be returned if VERSION0 was not created before VERSION1

.. code-block:: bash
export SERVER_URL=https://demo.dataverse.org
export ID=24
export VERSION0=1.0
export VERSION1=:draft
curl "$SERVER_URL/api/datasets/$ID/versions/$VERSION0/compare/$VERSION1"
The fully expanded example above (without environment variables) looks like this:

.. code-block:: bash
curl "https://demo.dataverse.org/api/datasets/24/versions/:latest-published/compare/:draft"
Update Metadata For a Dataset
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
4 changes: 4 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ public int compare(DatasetVersion o1, DatasetVersion o2) {
}
}
};
public static final JsonObjectBuilder compareVersions(DatasetVersion originalVersion, DatasetVersion newVersion) {
DatasetVersionDifference diff = new DatasetVersionDifference(newVersion, originalVersion);
return diff.compareVersionsAsJson();
}

// TODO: Determine the UI implications of various version states
//IMPORTANT: If you add a new value to this enum, you will also have to modify the
Expand Down
162 changes: 153 additions & 9 deletions src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import edu.harvard.iq.dataverse.datavariable.VariableMetadataUtil;
import edu.harvard.iq.dataverse.util.StringUtil;

import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
Expand All @@ -13,6 +14,10 @@
import java.util.Set;
import java.util.logging.Logger;

import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder;
import jakarta.json.Json;
import jakarta.json.JsonArrayBuilder;
import jakarta.json.JsonObjectBuilder;
import org.apache.commons.lang3.StringUtils;
import edu.harvard.iq.dataverse.util.BundleUtil;
import java.util.Arrays;
Expand All @@ -37,6 +42,7 @@ public final class DatasetVersionDifference {
private List<FileMetadata> addedFiles = new ArrayList<>();
private List<FileMetadata> removedFiles = new ArrayList<>();
private List<FileMetadata> changedFileMetadata = new ArrayList<>();
private Map<FileMetadata, Map<String,List<String>>> changedFileMetadataDiff = new HashMap<>();
private List<FileMetadata> changedVariableMetadata = new ArrayList<>();
private List<FileMetadata[]> replacedFiles = new ArrayList<>();
private List<String[]> changedTermsAccess = new ArrayList<>();
Expand Down Expand Up @@ -121,9 +127,12 @@ public DatasetVersionDifference(DatasetVersion newVersion, DatasetVersion origin
//If this file was in the original version
if(fmdo!= null) {
//Check for differences
if (!compareFileMetadatas(fmdo, fmdn)) {
Map<String, List<String>> fileMetadataDiff = compareFileMetadatas(fmdo, fmdn);
if (!fileMetadataDiff.isEmpty()) {
changedFileMetadata.add(fmdo);
changedFileMetadata.add(fmdn);
// TODO: find a better key for the map. needs to be something that doesn't change
changedFileMetadataDiff.put(fmdo, fileMetadataDiff);
}
if (!VariableMetadataUtil.compareVariableMetadata(fmdo,fmdn) || !compareVarGroup(fmdo, fmdn)) {
changedVariableMetadata.add(fmdo);
Expand Down Expand Up @@ -347,25 +356,34 @@ static boolean compareVarGroup(FileMetadata fmdo, FileMetadata fmdn) {
}
}

public static boolean compareFileMetadatas(FileMetadata fmdo, FileMetadata fmdn) {

public static Map<String,List<String>> compareFileMetadatas(FileMetadata fmdo, FileMetadata fmdn) {
Map<String,List<String>> fileMetadataChanged = new HashMap<>();
if (!StringUtils.equals(StringUtil.nullToEmpty(fmdo.getDescription()), StringUtil.nullToEmpty(fmdn.getDescription()))) {
return false;
fileMetadataChanged.put("Description",
List.of(StringUtil.nullToEmpty(fmdo.getDescription()), StringUtil.nullToEmpty(fmdn.getDescription())));
}

if (!StringUtils.equals(fmdo.getCategoriesByName().toString(), fmdn.getCategoriesByName().toString())) {
return false;
fileMetadataChanged.put("Categories",
List.of(fmdo.getCategoriesByName().toString(), fmdn.getCategoriesByName().toString()));
}

if (!StringUtils.equals(fmdo.getLabel(), fmdn.getLabel())) {
return false;
fileMetadataChanged.put("Label",
List.of(fmdo.getLabel(), fmdn.getLabel()));
}

if (!StringUtils.equals(fmdo.getProvFreeForm(), fmdn.getProvFreeForm())) {
return false;
fileMetadataChanged.put("ProvFreeForm",
List.of(fmdo.getProvFreeForm(), fmdn.getProvFreeForm()));
}

return fmdo.isRestricted() == fmdn.isRestricted();

if (fmdo.isRestricted() != fmdn.isRestricted()) {
fileMetadataChanged.put("isRestricted",
List.of(String.valueOf(fmdo.isRestricted()), String.valueOf(fmdn.isRestricted())));
}

return fileMetadataChanged;
}

private void compareValues(DatasetField originalField, DatasetField newField, boolean compound) {
Expand Down Expand Up @@ -1623,4 +1641,130 @@ List<FileMetadata> getChangedVariableMetadata() {
List<FileMetadata[]> getReplacedFiles() {
return replacedFiles;
}
public JsonObjectBuilder compareVersionsAsJson() {
JsonObjectBuilder job = new NullSafeJsonBuilder();
JsonObjectBuilder jobVersion = new NullSafeJsonBuilder();
jobVersion.add("versionNumber", originalVersion.getFriendlyVersionNumber());
jobVersion.add("lastUpdatedDate", new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").format(originalVersion.getLastUpdateTime()));
job.add("oldVersion", jobVersion);
jobVersion = new NullSafeJsonBuilder();
jobVersion.add("versionNumber", newVersion.getFriendlyVersionNumber());
jobVersion.add("lastUpdatedDate", new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").format(newVersion.getLastUpdateTime()));
job.add("newVersion", jobVersion);

if (!this.detailDataByBlock.isEmpty()) {
JsonArrayBuilder jabMetadata = Json.createArrayBuilder();
for (List<DatasetField[]> blocks : detailDataByBlock) {
JsonObjectBuilder jobMetadata = new NullSafeJsonBuilder();
JsonArrayBuilder jab = Json.createArrayBuilder();
String blockDisplay = blocks.get(0)[0].getDatasetFieldType().getMetadataBlock().getDisplayName();
for (DatasetField[] dsfArray : blocks) {
JsonObjectBuilder jb = new NullSafeJsonBuilder();
jb.add("fieldName", dsfArray[0].getDatasetFieldType().getTitle());
if (dsfArray[0].getDatasetFieldType().isPrimitive()) {
jb.add("oldValue", dsfArray[0].getRawValue());
} else {
jb.add("oldValue", dsfArray[0].getCompoundRawValue());
}
if (dsfArray[1].getDatasetFieldType().isPrimitive()) {
jb.add("newValue", dsfArray[1].getRawValue());
} else {
jb.add("newValue", dsfArray[1].getCompoundRawValue());
}
jab.add(jb);
}
jobMetadata.add("blockName", blockDisplay);
jobMetadata.add("changed", jab);
jabMetadata.add(jobMetadata);
}
job.add("metadataChanges", jabMetadata);
}

// Format added, removed, and modified files
JsonArrayBuilder jabDiffFiles = Json.createArrayBuilder();
if (!addedFiles.isEmpty()) {
JsonArrayBuilder jab = Json.createArrayBuilder();
addedFiles.forEach(f -> {
jab.add(filesDiffJson(f));
});
job.add("filesAdded", jab);
}
if (!removedFiles.isEmpty()) {
JsonArrayBuilder jab = Json.createArrayBuilder();
removedFiles.forEach(f -> {
jab.add(filesDiffJson(f));
});
job.add("filesRemoved", jab);
}
if (!replacedFiles.isEmpty()) {
JsonArrayBuilder jabReplaced = Json.createArrayBuilder();
replacedFiles.forEach(fm -> {
if (fm.length == 2) {
JsonObjectBuilder jobReplaced = new NullSafeJsonBuilder();
jobReplaced.add("oldFile", filesDiffJson(fm[0]));
jobReplaced.add("newFile", filesDiffJson(fm[1]));
jabReplaced.add(jobReplaced);
}
});
job.add("filesReplaced", jabReplaced);
}
if (!changedFileMetadata.isEmpty()) {
changedFileMetadataDiff.entrySet().forEach(entry -> {
JsonArrayBuilder jab = Json.createArrayBuilder();
JsonObjectBuilder jobChanged = new NullSafeJsonBuilder();
jobChanged.add("fileName", entry.getKey().getDataFile().getDisplayName());
jobChanged.add(entry.getKey().getDataFile().getChecksumType().name(), entry.getKey().getDataFile().getChecksumValue());
jobChanged.add("fileId", entry.getKey().getDataFile().getId());
entry.getValue().entrySet().forEach(e -> {
JsonObjectBuilder jobDiffField = new NullSafeJsonBuilder();
jobDiffField.add("fieldName",e.getKey());
jobDiffField.add("oldValue",e.getValue().get(0));
jobDiffField.add("newValue",e.getValue().get(1));
jab.add(jobDiffField);
});
jobChanged.add("changed", jab);
jabDiffFiles.add(jobChanged);
});
job.add("fileChanges", jabDiffFiles);
}

// Format Terms Of Access changes
if (!changedTermsAccess.isEmpty()) {
JsonObjectBuilder jobTOA = new NullSafeJsonBuilder();
JsonArrayBuilder jab = Json.createArrayBuilder();
changedTermsAccess.forEach(toa -> {
JsonObjectBuilder jobValue = new NullSafeJsonBuilder();
jobValue.add("fieldName",toa[0]);
jobValue.add("oldValue",toa[1]);
jobValue.add("newValue",toa[2]);
jab.add(jobValue);
});
jobTOA.add("changed", jab);
job.add("TermsOfAccess", jobTOA);
}

return job;
}
private JsonObjectBuilder filesDiffJson(FileMetadata fileMetadata) {
NullSafeJsonBuilder job = new NullSafeJsonBuilder();
DataFile df = fileMetadata.getDataFile();
job.add("fileName", df.getDisplayName())
.add("filePath", fileMetadata.getDirectoryLabel())
.add(df.getChecksumType().name(), df.getChecksumValue())
.add("type",df.getContentType())
.add("fileId", df.getId())
.add("description", fileMetadata.getDescription())
.add("isRestricted", df.isRestricted());
if (fileMetadata.getCategories() != null && !fileMetadata.getCategories().isEmpty()) {
JsonArrayBuilder jabCategories = Json.createArrayBuilder();
fileMetadata.getCategories().forEach(c -> jabCategories.add(c.getName()));
job.add("categories", jabCategories);
}
if (df.getTags() != null && !df.getTags().isEmpty()) {
JsonArrayBuilder jabTags = Json.createArrayBuilder();
df.getTags().forEach(t -> jabTags.add(t.getTypeLabel()));
job.add("tags", jabTags);
}
return job;
}
}
20 changes: 20 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
Original file line number Diff line number Diff line change
Expand Up @@ -3017,6 +3017,26 @@ public Response cleanStorage(@Context ContainerRequestContext crc, @PathParam("i

}

@GET
@AuthRequired
@Path("{id}/versions/{versionId1}/compare/{versionId2}")
public Response getCompareVersions(@Context ContainerRequestContext crc, @PathParam("id") String id,
@PathParam("versionId1") String versionId1,
@PathParam("versionId2") String versionId2,
@Context UriInfo uriInfo, @Context HttpHeaders headers) {
try {
DataverseRequest req = createDataverseRequest(getRequestUser(crc));
DatasetVersion dsv1 = getDatasetVersionOrDie(req, versionId1, findDatasetOrDie(id), uriInfo, headers);
DatasetVersion dsv2 = getDatasetVersionOrDie(req, versionId2, findDatasetOrDie(id), uriInfo, headers);
if (dsv1.getCreateTime().getTime() > dsv2.getCreateTime().getTime()) {
return error(BAD_REQUEST, BundleUtil.getStringFromBundle("dataset.version.compare.incorrect.order"));
}
return ok(DatasetVersion.compareVersions(dsv1, dsv2));
} catch (WrappedResponse wr) {
return wr.getResponse();
}
}

private static Set<String> getDatasetFilenames(Dataset dataset) {
Set<String> files = new HashSet<>();
for (DataFile dataFile: dataset.getFiles()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException {
throw new IllegalCommandException(BundleUtil.getStringFromBundle("datasetversion.update.failure"), this);
} else {

metadataUpdated = DatasetVersionDifference.compareFileMetadatas(publishedFmd, draftFmd);
metadataUpdated = !DatasetVersionDifference.compareFileMetadatas(publishedFmd, draftFmd).isEmpty();
publishedFmd.setLabel(draftFmd.getLabel());
publishedFmd.setDescription(draftFmd.getDescription());
publishedFmd.setCategories(draftFmd.getCategories());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1007,7 +1007,6 @@ public void parseControlledVocabularyValue(DatasetField dsf, DatasetFieldType cv
if (cvv == null) {
if (allowHarvestingMissingCVV) {
// we need to process this as a primitive value
logger.warning(">>>> Value '" + strValue + "' does not exist in type '" + cvvType.getName() + "'. Processing as primitive per setting override.");
parsePrimitiveValue(dsf, cvvType , json);
return;
} else {
Expand Down
1 change: 1 addition & 0 deletions src/main/java/propertyFiles/Bundle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2511,6 +2511,7 @@ dataset.version.file.changed=Files (Changed File Metadata: {0}
dataset.version.file.changed2=; Changed File Metadata: {0}
dataset.version.variablemetadata.changed=Variable Metadata (Changed Variable Metadata: {0}
dataset.version.variablemetadata.changed2=; Changed Variable Metadata: {0}
dataset.version.compare.incorrect.order=Compare requires the older dataset version to be listed first.

#DataversePage.java
dataverse.item.required=Required
Expand Down
Loading

0 comments on commit a3a11a4

Please sign in to comment.