From 72ded5c7d7a2c0a7b7673cd671f3f5031ed0d3fc Mon Sep 17 00:00:00 2001 From: Eva Roddeck Date: Wed, 17 Jul 2024 09:58:27 +0200 Subject: [PATCH 1/2] added whitespace trimming to dataset-field loading + null check for header #10688 --- .../dataverse/api/DatasetFieldServiceApi.java | 12 +-- .../api/DatasetFieldServiceApiTest.java | 83 +++++++++++++++++++ src/test/resources/tsv/whitespace-test.tsv | 10 +++ 3 files changed, 100 insertions(+), 5 deletions(-) create mode 100644 src/test/resources/tsv/whitespace-test.tsv diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java b/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java index 01c51dc2b4c..907295ad848 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java @@ -126,7 +126,7 @@ public Response getByName(@PathParam("name") String name) { String solrFieldSearchable = dsf.getSolrField().getNameSearchable(); String solrFieldFacetable = dsf.getSolrField().getNameFacetable(); String metadataBlock = dsf.getMetadataBlock().getName(); - String uri=dsf.getUri(); + String uri = dsf.getUri(); boolean hasParent = dsf.isHasParent(); boolean allowsMultiples = dsf.isAllowMultiples(); boolean isRequired = dsf.isRequired(); @@ -243,7 +243,9 @@ public Response loadDatasetFields(File file) { br = new BufferedReader(new FileReader("/" + file)); while ((line = br.readLine()) != null) { lineNumber++; - values = line.split(splitBy); + values = Arrays.stream(line.split(splitBy)) + .map(String::trim) + .toArray(String[]::new); if (values[0].startsWith("#")) { // Header row switch (values[0]) { case "#metadataBlock": @@ -326,7 +328,7 @@ public Response loadDatasetFields(File file) { */ public String getGeneralErrorMessage(HeaderType header, int lineNumber, String message) { List arguments = new ArrayList<>(); - arguments.add(header.name()); + arguments.add(header != null ? header.name() : "unknown"); arguments.add(String.valueOf(lineNumber)); arguments.add(message); return BundleUtil.getStringFromBundle("api.admin.datasetfield.load.GeneralErrorMessage", arguments); @@ -334,9 +336,9 @@ public String getGeneralErrorMessage(HeaderType header, int lineNumber, String m /** * Turn ArrayIndexOutOfBoundsException into an informative error message - * @param lineNumber * @param header - * @param e + * @param lineNumber + * @param wrongIndex * @return */ public String getArrayIndexOutOfBoundMessage(HeaderType header, diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApiTest.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApiTest.java index ca99960f240..5f00d34b276 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApiTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApiTest.java @@ -1,15 +1,61 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.ControlledVocabularyValueServiceBean; +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; +import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.MetadataBlockServiceBean; +import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; +import jakarta.json.Json; +import jakarta.json.JsonObject; +import jakarta.json.JsonReader; +import jakarta.ws.rs.core.Response; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import java.io.File; +import java.io.StringReader; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; +@ExtendWith(MockitoExtension.class) public class DatasetFieldServiceApiTest { + @Mock + private ActionLogServiceBean actionLogSvc; + + @Mock + private MetadataBlockServiceBean metadataBlockService; + + @Mock + private DataverseServiceBean dataverseService; + + @Mock + private DatasetFieldServiceBean datasetFieldService; + + @Mock + private ControlledVocabularyValueServiceBean controlledVocabularyValueService; + + private DatasetFieldServiceApi api; + + @BeforeEach + public void setup(){ + api = new DatasetFieldServiceApi(); + api.actionLogSvc = actionLogSvc; + api.metadataBlockService = metadataBlockService; + api.dataverseService = dataverseService; + api.datasetFieldService = datasetFieldService; + api.controlledVocabularyValueService = controlledVocabularyValueService; + } + @Test public void testArrayIndexOutOfBoundMessageBundle() { List arguments = new ArrayList<>(); @@ -59,4 +105,41 @@ public void testGetGeneralErrorMessage() { message ); } + + @Test + public void testGetGeneralErrorMessageEmptyHeader() { + DatasetFieldServiceApi api = new DatasetFieldServiceApi(); + String message = api.getGeneralErrorMessage(null, 5, "some error"); + assertEquals( + "Error parsing metadata block in unknown part, line #5: some error", + message + ); + } + + @Test + public void testLoadDatasetFieldsWhitespaceTrimming() { + + Path resourceDirectory = Paths.get("src/test/resources/tsv/whitespace-test.tsv"); + File testfile = new File(resourceDirectory.toFile().getAbsolutePath()); + JsonReader jsonReader; + try (Response response = api.loadDatasetFields(testfile)) { + assertEquals(200, response.getStatus()); + jsonReader = Json.createReader(new StringReader(response.getEntity().toString())); + } + JsonObject jsonObject = jsonReader.readObject(); + + final List metadataNames = jsonObject.getJsonObject("data").getJsonArray("added") + .getValuesAs(e -> e.asJsonObject().getString("name")); + assertThat(metadataNames).contains("whitespaceDemo") + .contains("whitespaceDemoOne") + .contains("whitespaceDemoTwo") + .contains("whitespaceDemoThree") + .contains("CV1") + .contains("CV2") + .contains("CV3"); + assertThat(metadataNames).doesNotContain(" whitespaceDemo") + .doesNotContain("whitespaceDemoOne ") + .doesNotContain("CV1 ") + .doesNotContain(" CV2"); + } } diff --git a/src/test/resources/tsv/whitespace-test.tsv b/src/test/resources/tsv/whitespace-test.tsv new file mode 100644 index 00000000000..5485c948825 --- /dev/null +++ b/src/test/resources/tsv/whitespace-test.tsv @@ -0,0 +1,10 @@ +#metadataBlock name dataverseAlias displayName + whitespaceDemo Whitespace Demo +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id + whitespaceDemoOne One Trailing Space text 0 TRUE TRUE TRUE FALSE TRUE FALSE whitespaceDemo + whitespaceDemoTwo Two Leading Space text 1 TRUE TRUE TRUE FALSE TRUE FALSE whitespaceDemo + whitespaceDemoThree Three CV with errors text 2 TRUE TRUE TRUE FALSE TRUE FALSE whitespaceDemo +#controlledVocabulary DatasetField Value identifier displayOrder + whitespaceDemoThree CV1 0 + whitespaceDemoThree CV2 1 + whitespaceDemoThree CV3 2 From 1cbe75ff2ef326b5416be6c5ac6878fe6c6cbbd1 Mon Sep 17 00:00:00 2001 From: Eva Roddeck Date: Fri, 19 Jul 2024 11:08:56 +0200 Subject: [PATCH 2/2] added realease note #10688 --- doc/release-notes/10688_whitespace_trimming.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 doc/release-notes/10688_whitespace_trimming.md diff --git a/doc/release-notes/10688_whitespace_trimming.md b/doc/release-notes/10688_whitespace_trimming.md new file mode 100644 index 00000000000..52904c00fbf --- /dev/null +++ b/doc/release-notes/10688_whitespace_trimming.md @@ -0,0 +1,6 @@ +### Added whitespace trimming to uploaded custom metadata TSV files + +When loading custom metadata blocks using the `api/admin/datasetfield/load` API, whitespace can be introduced into field names. +This change trims whitespace at the beginning and end of all values read into the API before persisting them. + +For more information, see #10688.