From 310dad183c58f7dbee0ffde5e2e5df5f1582b6e1 Mon Sep 17 00:00:00 2001 From: Jan van Mansum Date: Thu, 24 Oct 2024 16:33:01 +0200 Subject: [PATCH 1/4] Implemented delete-metadata, wip --- pom.xml | 2 +- .../nl/knaw/dans/dvcli/DdDataverseCli.java | 2 + .../dataset/DatasetDeleteMetadata.java | 67 +++++++++++++ .../dvcli/inputparsers/FieldValuesParser.java | 97 +++++++++++++++++++ 4 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 src/main/java/nl/knaw/dans/dvcli/command/dataset/DatasetDeleteMetadata.java create mode 100644 src/main/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParser.java diff --git a/pom.xml b/pom.xml index 0da60a1..f88b933 100644 --- a/pom.xml +++ b/pom.xml @@ -80,7 +80,7 @@ nl.knaw.dans dans-dataverse-client-lib - 0.33.0 + 0.33.1-SNAPSHOT org.apache.commons diff --git a/src/main/java/nl/knaw/dans/dvcli/DdDataverseCli.java b/src/main/java/nl/knaw/dans/dvcli/DdDataverseCli.java index 5204870..4ba9ab9 100644 --- a/src/main/java/nl/knaw/dans/dvcli/DdDataverseCli.java +++ b/src/main/java/nl/knaw/dans/dvcli/DdDataverseCli.java @@ -37,6 +37,7 @@ import nl.knaw.dans.dvcli.command.collection.roleassignment.CollectionRoleAssignmentRemove; import nl.knaw.dans.dvcli.command.dataset.DatasetCmd; import nl.knaw.dans.dvcli.command.dataset.DatasetDeleteDraft; +import nl.knaw.dans.dvcli.command.dataset.DatasetDeleteMetadata; import nl.knaw.dans.dvcli.command.dataset.DatasetGetFiles; import nl.knaw.dans.dvcli.command.dataset.DatasetGetLatestVersion; import nl.knaw.dans.dvcli.command.dataset.DatasetGetVersion; @@ -95,6 +96,7 @@ public void configureCommandLine(CommandLine commandLine, DdDataverseCliConfig c .addSubcommand(new DatasetGetLatestVersion()) .addSubcommand(new DatasetGetVersion()) .addSubcommand(new DatasetPublish()) + .addSubcommand(new DatasetDeleteMetadata()) .addSubcommand(new CommandLine(new DatasetRoleAssignment()) .addSubcommand(new DatasetRoleAssignmentList()) .addSubcommand(new DatasetRoleAssignmentAdd()) diff --git a/src/main/java/nl/knaw/dans/dvcli/command/dataset/DatasetDeleteMetadata.java b/src/main/java/nl/knaw/dans/dvcli/command/dataset/DatasetDeleteMetadata.java new file mode 100644 index 0000000..024f1ad --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvcli/command/dataset/DatasetDeleteMetadata.java @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvcli.command.dataset; + +import nl.knaw.dans.dvcli.command.AbstractCmd; +import nl.knaw.dans.dvcli.inputparsers.FieldValuesParser; +import nl.knaw.dans.lib.dataverse.model.dataset.FieldList; +import picocli.CommandLine.ArgGroup; +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; +import picocli.CommandLine.ParentCommand; + +import java.nio.file.Path; +import java.util.Collections; +import java.util.List; + +@Command(name = "delete-metadata", description = """ + Delete metadata fields from a dataset. The fields to delete can be specified as command line options or in a CSV file. The dataset will be in draft state after the operation. + """) +public class DatasetDeleteMetadata extends AbstractCmd { + @ParentCommand + private DatasetCmd datasetCmd; + + static class FieldValueOrParameterFile { + @Option(names = { "-f", + "--field-value" }, description = """ + Field name and value to delete. If the field is a compound field, multiple field-values specified together will be treated as a single compound field. If you need to + delete multiple values from the same field, you have to call this command multiple times. + The format is: field-name=field-value. For example, to delete a field named 'alternativeTitle' with value 'Some title', use --field-value 'alternativeTitle=Some title'. + For compound fields, the field name must be prefixed with the field name of the parent field e.g., 'author.authorName' for the subfield 'authorName' of the compound field 'author'. + If the field is repeatable, you must add an asterisk (*) at the end of the field name. + """) + private List fieldValues; + + @Option(names = { "-p", "--parameters-file" }, description = """ + Path to a CSV file containing the field names and values to delete. The file must have a header row with the field names. Each subsequent row must contain the field values. + There must be a column 'PID' containing the dataset persistent identifier. The other column headers must match field names in the dataset metadata. Compound fields must be specified as + 'parentField.childField'. If you need to delete multiple fields from one dataset, use multiple rows in the CSV file. + """) + private Path parametersFile; + } + + @ArgGroup(multiplicity = "1") + private FieldValueOrParameterFile fieldValueOrParameterFile; + + @Override + public void doCall() throws Exception { + var metadataFields = new FieldValuesParser(fieldValueOrParameterFile.fieldValues).parse(); + datasetCmd.batchProcessor(d -> { + d.deleteMetadata(new FieldList(metadataFields), Collections.emptyMap()); + return "Delete metadata"; + }).process(); + } +} diff --git a/src/main/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParser.java b/src/main/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParser.java new file mode 100644 index 0000000..6296e0d --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParser.java @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvcli.inputparsers; + +import lombok.AllArgsConstructor; +import nl.knaw.dans.lib.dataverse.CompoundFieldBuilder; +import nl.knaw.dans.lib.dataverse.model.dataset.MetadataField; +import nl.knaw.dans.lib.dataverse.model.dataset.PrimitiveMultiValueField; +import nl.knaw.dans.lib.dataverse.model.dataset.PrimitiveSingleValueField; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@AllArgsConstructor +public class FieldValuesParser { + private final List values; + + public List parse() { + Map keyValues = new HashMap<>(); + + for (var value : values) { + String[] split = value.split("=", 2); + keyValues.put(checkValidName(split[0]), split[1]); + } + + Map> compoundFields = new HashMap<>(); + List keysToRemove = new ArrayList<>(); + for (var key : keyValues.keySet()) { + if (key.contains(".")) { + String[] split = key.split("\\.", 2); + String parent = split[0]; + String child = split[1]; + if (!compoundFields.containsKey(parent)) { + compoundFields.put(parent, new HashMap<>()); + } + compoundFields.get(parent).put(child, keyValues.get(key)); + keysToRemove.add(key); + } + } + for (var key : keysToRemove) { + keyValues.remove(key); + } + + List result = new ArrayList<>(); + + for (var key : keyValues.keySet()) { + if (key.endsWith("*")) { + result.add(new PrimitiveMultiValueField(key.substring(0, key.length() - 1), List.of(keyValues.get(key)))); + } + else { + result.add(new PrimitiveSingleValueField(key, keyValues.get(key))); + } + } + + for (var parent : compoundFields.keySet()) { + Map subfields = compoundFields.get(parent); + if (parent.endsWith("*")) { + var builder = new CompoundFieldBuilder(parent.substring(0, parent.length() - 1), true); + for (var subfield : subfields.keySet()) { + builder.addSubfield(subfield, subfields.get(subfield)); + } + result.add(builder.build()); + } + else { + var builder = new CompoundFieldBuilder(parent, false); + for (var subfield : subfields.keySet()) { + builder.addSubfield(subfield, subfields.get(subfield)); + } + result.add(builder.build()); + } + } + + return result; + } + + private String checkValidName(String name) { + if (!name.matches("[a-zA-Z0-9]+\\*?(\\.[a-zA-Z0-9]+)?")) { + throw new IllegalArgumentException("Invalid field name: " + name); + } + return name; + } +} From d4a50d64d11e3cc191f4eaecbebf42db37aec573 Mon Sep 17 00:00:00 2001 From: Jan van Mansum Date: Thu, 24 Oct 2024 16:47:28 +0200 Subject: [PATCH 2/4] Added unit test --- .../dataset/DatasetDeleteMetadata.java | 28 +++---- .../inputparsers/FieldValuesParserTest.java | 80 +++++++++++++++++++ 2 files changed, 93 insertions(+), 15 deletions(-) create mode 100644 src/test/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParserTest.java diff --git a/src/main/java/nl/knaw/dans/dvcli/command/dataset/DatasetDeleteMetadata.java b/src/main/java/nl/knaw/dans/dvcli/command/dataset/DatasetDeleteMetadata.java index 024f1ad..f064414 100644 --- a/src/main/java/nl/knaw/dans/dvcli/command/dataset/DatasetDeleteMetadata.java +++ b/src/main/java/nl/knaw/dans/dvcli/command/dataset/DatasetDeleteMetadata.java @@ -27,29 +27,27 @@ import java.util.Collections; import java.util.List; -@Command(name = "delete-metadata", description = """ - Delete metadata fields from a dataset. The fields to delete can be specified as command line options or in a CSV file. The dataset will be in draft state after the operation. - """) +@Command(name = "delete-metadata", + mixinStandardHelpOptions = true, + description = """ + Delete metadata fields from a dataset. The fields to delete can be specified as command line options or in a CSV file. The dataset will be in draft state after the operation. + """) public class DatasetDeleteMetadata extends AbstractCmd { @ParentCommand private DatasetCmd datasetCmd; static class FieldValueOrParameterFile { @Option(names = { "-f", - "--field-value" }, description = """ - Field name and value to delete. If the field is a compound field, multiple field-values specified together will be treated as a single compound field. If you need to - delete multiple values from the same field, you have to call this command multiple times. - The format is: field-name=field-value. For example, to delete a field named 'alternativeTitle' with value 'Some title', use --field-value 'alternativeTitle=Some title'. - For compound fields, the field name must be prefixed with the field name of the parent field e.g., 'author.authorName' for the subfield 'authorName' of the compound field 'author'. - If the field is repeatable, you must add an asterisk (*) at the end of the field name. - """) + "--field-value" }, description = "Field name and value to delete. If the field is a compound field, multiple field-values specified together will be treated as a single compound field. " + + "If you need to delete multiple values from the same field, you have to call this command multiple times. " + + "The format is: field-name=field-value. For example, to delete a field named 'alternativeTitle' with value 'Some title', use --field-value 'alternativeTitle=Some title'. " + + "For compound fields, the field name must be prefixed with the field name of the parent field e.g., 'author.authorName' for the subfield 'authorName' of the compound field 'author'. " + + "If the field is repeatable, you must add an asterisk (*) at the end of the field name.") private List fieldValues; - @Option(names = { "-p", "--parameters-file" }, description = """ - Path to a CSV file containing the field names and values to delete. The file must have a header row with the field names. Each subsequent row must contain the field values. - There must be a column 'PID' containing the dataset persistent identifier. The other column headers must match field names in the dataset metadata. Compound fields must be specified as - 'parentField.childField'. If you need to delete multiple fields from one dataset, use multiple rows in the CSV file. - """) + @Option(names = { "-p", "--parameters-file" }, description = "Path to a CSV file containing the field names and values to delete. The file must have a header row with the field names. " + + "Each subsequent row must contain the field values. There must be a column 'PID' containing the dataset persistent identifier. The other column headers must match field names in " + + "the dataset metadata. Compound fields must be specified as 'parentField.childField'. If you need to delete multiple fields from one dataset, use multiple rows in the CSV file.") private Path parametersFile; } diff --git a/src/test/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParserTest.java b/src/test/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParserTest.java new file mode 100644 index 0000000..7892332 --- /dev/null +++ b/src/test/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParserTest.java @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvcli.inputparsers; + +import nl.knaw.dans.lib.dataverse.CompoundFieldBuilder; +import nl.knaw.dans.lib.dataverse.model.dataset.PrimitiveMultiValueField; +import nl.knaw.dans.lib.dataverse.model.dataset.PrimitiveSingleValueField; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +public class FieldValuesParserTest { + + @Test + public void parse_should_create_one_single_value_field() { + var values = List.of("field1=value1"); + assertThat(new FieldValuesParser(values).parse()) + .containsExactly(new PrimitiveSingleValueField("field1", "value1")); + + } + + @Test + public void parse_should_create_one_multivalue_field_if_name_ends_with_asterisk() { + var values = List.of("field1*=value1"); + assertThat(new FieldValuesParser(values).parse()) + .containsExactly(new PrimitiveMultiValueField("field1", List.of("value1"))); + } + + @Test + public void parse_should_create_two_single_value_fields() { + var values = List.of("field1=value1", "field2=value2"); + assertThat(new FieldValuesParser(values).parse()) + .containsExactly(new PrimitiveSingleValueField("field1", "value1"), + new PrimitiveSingleValueField("field2", "value2")); + } + + @Test + public void parse_should_create_one_multivalue_field_if_name_contains_dot() { + var values = List.of("parent.child=value1"); + assertThat(new FieldValuesParser(values).parse()) + .containsExactly(new CompoundFieldBuilder("parent", false) + .addSubfield("child", "value1") + .build()); + } + + @Test + public void parse_should_create_one_multivalue_field_if_name_contains_dot_and_parent_name_ends_with_asterisk() { + var values = List.of("parent*.child=value1"); + assertThat(new FieldValuesParser(values).parse()) + .containsExactly(new CompoundFieldBuilder("parent", true) + .addSubfield("child", "value1") + .build()); + } + + @Test + public void parse_should_create_one_multivalue_field_if_name_contains_dot_and_parent_name_ends_with_asterisk_and_multiple_subfields() { + var values = List.of("parent*.child1=value1", "parent*.child2=value2"); + assertThat(new FieldValuesParser(values).parse()) + .containsExactly(new CompoundFieldBuilder("parent", true) + .addSubfield("child1", "value1") + .addSubfield("child2", "value2") + .build()); + } + +} From 550fb342b0b2d484e319a8186a75a37a7dd4a3ff Mon Sep 17 00:00:00 2001 From: Jan van Mansum Date: Fri, 25 Oct 2024 10:16:46 +0200 Subject: [PATCH 3/4] Fixed unit tests --- .../dans/dvcli/action/BatchProcessor.java | 42 +++++++++++++----- .../command/dataset/DatasetValidateFiles.java | 26 +++++------ .../dans/dvcli/action/BatchProcessorTest.java | 44 +++++++++---------- .../AbstractSubcommandContainerTest.java | 11 ++--- .../command/CollectionCreateDatasetTest.java | 18 ++++---- .../command/TruncateNotificationsTest.java | 8 ++-- 6 files changed, 83 insertions(+), 66 deletions(-) diff --git a/src/main/java/nl/knaw/dans/dvcli/action/BatchProcessor.java b/src/main/java/nl/knaw/dans/dvcli/action/BatchProcessor.java index ff18506..3459ca5 100644 --- a/src/main/java/nl/knaw/dans/dvcli/action/BatchProcessor.java +++ b/src/main/java/nl/knaw/dans/dvcli/action/BatchProcessor.java @@ -19,7 +19,9 @@ import lombok.NonNull; import lombok.extern.slf4j.Slf4j; -import java.util.List; +import java.util.Collection; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Stream; /** * Processes a batch of labeled items by applying an action to each item. The labels are used for reporting. Typically, the label is the ID of the item. After each action, the processor waits for a @@ -31,12 +33,17 @@ */ @Builder @Slf4j -public class BatchProcessor { +public class BatchProcessor { /** * The labeled items to process. */ @NonNull - private final List> labeledItems; + private final Stream> labeledItems; + + /** + * The number of items to process. If the labeled items are a collection, this number is the size of the collection. Otherwise, it is null. + */ + private final Long numberOfItems; /** * The action to apply to each item. @@ -56,15 +63,28 @@ public class BatchProcessor { @Builder.Default private final long delay = 1000; + public static class BatchProcessorBuilder { + public BatchProcessorBuilder labeledItems(Collection> items) { + this.labeledItems = items.stream(); + this.numberOfItems = (long) items.size(); + return this; + } + } + public void process() { - log.info("Starting batch processing"); - int i = 0; - for (var labeledItem : labeledItems) { - delayIfNeeded(i); - log.info("Processing item {} of {}", ++i, labeledItems.size()); - callAction(labeledItem.getFirst(), labeledItem.getSecond()); + log.info("Starting batch processing of " + (numberOfItems == null ? "?" : numberOfItems + " items")); + AtomicInteger i = new AtomicInteger(0); + try { + labeledItems.forEach(labeledItem -> { + int index = i.incrementAndGet(); + delayIfNeeded(index); + log.info("Processing item {} of {}: {}", index, numberOfItems == null ? "?" : numberOfItems, labeledItem.getFirst()); + callAction(labeledItem.getFirst(), labeledItem.getSecond()); + }); + } finally { + labeledItems.close(); } - log.info("Finished batch processing of {} items", labeledItems.size()); + log.info("Finished batch processing of " + (numberOfItems == null ? "?" : numberOfItems + " items")); } private void callAction(String label, I item) { @@ -78,7 +98,7 @@ private void callAction(String label, I item) { } private void delayIfNeeded(int i) { - if (delay > 0 && i > 0) { + if (delay > 0 && i > 1) { log.debug("Sleeping for {} ms", delay); try { Thread.sleep(delay); diff --git a/src/main/java/nl/knaw/dans/dvcli/command/dataset/DatasetValidateFiles.java b/src/main/java/nl/knaw/dans/dvcli/command/dataset/DatasetValidateFiles.java index 270a872..9c4a1b1 100644 --- a/src/main/java/nl/knaw/dans/dvcli/command/dataset/DatasetValidateFiles.java +++ b/src/main/java/nl/knaw/dans/dvcli/command/dataset/DatasetValidateFiles.java @@ -15,10 +15,10 @@ */ package nl.knaw.dans.dvcli.command.dataset; +import nl.knaw.dans.dvcli.action.BatchProcessor; import nl.knaw.dans.dvcli.action.ConsoleReport; import nl.knaw.dans.dvcli.action.Pair; import nl.knaw.dans.dvcli.action.SingleIdOrIdsFile; -import nl.knaw.dans.dvcli.action.ThrowingFunction; import nl.knaw.dans.dvcli.command.AbstractCmd; import nl.knaw.dans.lib.dataverse.AdminApi; import nl.knaw.dans.lib.dataverse.DataverseException; @@ -30,35 +30,31 @@ @Command(name = "validate-files", mixinStandardHelpOptions = true, - description = "Make sure that all files are correctly stored in object storage.") + description = "Validate the fixity checksums of the files in a dataset.") public class DatasetValidateFiles extends AbstractCmd { @ParentCommand private DatasetCmd datasetCmd; - protected List> getIds() throws IOException { + protected List> getIds() throws IOException { List pids = new SingleIdOrIdsFile(datasetCmd.getTargets(), SingleIdOrIdsFile.DEFAULT_TARGET_PLACEHOLDER).getPids().toList(); - return pids.stream().map(p -> new Pair<>(p, new IdParam(datasetCmd.getDataverseClient().admin(), p))).toList(); + // The label is the same as the id. Since the BatchProcessor expects labeled items, we create a list of pairs with the same id as label. + return pids.stream().map(p -> new Pair<>(p, p)).toList(); } protected record IdParam(AdminApi admin, String id) { } - private static class ValidateFilesAction implements ThrowingFunction { - @Override - public String apply(IdParam idParam) throws IOException, DataverseException { - var r = idParam.admin().validateDatasetFiles(idParam.id); - return r.getBodyAsString(); - } - } - @Override public void doCall() throws IOException, DataverseException { - datasetCmd. paramsBatchProcessorBuilder() + // Not using the helper method on datasetCmd because we need to call the admin endpoint and not the dataset endpoint. + BatchProcessor. builder() .labeledItems(getIds()) - .action(new ValidateFilesAction()) + .action(pid -> { + var r = datasetCmd.getDataverseClient().admin().validateDatasetFiles(pid); + return r.getBodyAsString(); + }) .report(new ConsoleReport<>()) .build() .process(); } - } diff --git a/src/test/java/nl/knaw/dans/dvcli/action/BatchProcessorTest.java b/src/test/java/nl/knaw/dans/dvcli/action/BatchProcessorTest.java index babfb80..bad32a4 100644 --- a/src/test/java/nl/knaw/dans/dvcli/action/BatchProcessorTest.java +++ b/src/test/java/nl/knaw/dans/dvcli/action/BatchProcessorTest.java @@ -59,23 +59,23 @@ BatchProcessor. builder() a: OK. b: FAILED: Exception type = RuntimeException, message = test c: OK.""" + " "); // java text block trims trailing spaces assertThat(stdout.toString()).isEqualTo(""" - INFO Starting batch processing - INFO Processing item 1 of 3 + INFO Starting batch processing of 3 items + INFO Processing item 1 of 3: a ok DEBUG Sleeping for 1 ms - INFO Processing item 2 of 3 + INFO Processing item 2 of 3: b DEBUG Sleeping for 1 ms - INFO Processing item 3 of 3 + INFO Processing item 3 of 3: c ok INFO Finished batch processing of 3 items """); assertThat(messagesOf(logged)) - .containsExactly("INFO Starting batch processing", - "INFO Processing item 1 of 3", + .containsExactly("INFO Starting batch processing of 3 items", + "INFO Processing item 1 of 3: a", "DEBUG Sleeping for 1 ms", - "INFO Processing item 2 of 3", + "INFO Processing item 2 of 3: b", "DEBUG Sleeping for 1 ms", - "INFO Processing item 3 of 3", + "INFO Processing item 3 of 3: c", "INFO Finished batch processing of 3 items"); } @@ -95,14 +95,14 @@ BatchProcessor. builder() assertThat(stderr.toString()) .isEqualTo("a: OK. b: OK. c: OK. "); assertThat(stdout.toString()).isEqualTo(""" - INFO Starting batch processing - INFO Processing item 1 of 3 + INFO Starting batch processing of 3 items + INFO Processing item 1 of 3: a ok DEBUG Sleeping for 1000 ms - INFO Processing item 2 of 3 + INFO Processing item 2 of 3: b ok DEBUG Sleeping for 1000 ms - INFO Processing item 3 of 3 + INFO Processing item 3 of 3: c ok INFO Finished batch processing of 3 items """); @@ -124,12 +124,12 @@ BatchProcessor. builder() assertThat(stderr.toString()).isEqualTo("A: OK. B: OK. C: OK. "); assertThat(stdout.toString()).isEqualTo(""" - INFO Starting batch processing - INFO Processing item 1 of 3 + INFO Starting batch processing of 3 items + INFO Processing item 1 of 3: A ok - INFO Processing item 2 of 3 + INFO Processing item 2 of 3: B ok - INFO Processing item 3 of 3 + INFO Processing item 3 of 3: C ok INFO Finished batch processing of 3 items """); @@ -150,12 +150,12 @@ BatchProcessor. builder() assertThat(stderr.toString()).isEqualTo("X: OK. Y: OK. Z: OK. "); assertThat(stdout.toString()).isEqualTo(""" - INFO Starting batch processing - INFO Processing item 1 of 3 + INFO Starting batch processing of 3 items + INFO Processing item 1 of 3: X ok - INFO Processing item 2 of 3 + INFO Processing item 2 of 3: Y ok - INFO Processing item 3 of 3 + INFO Processing item 3 of 3: Z ok INFO Finished batch processing of 3 items """); @@ -172,11 +172,11 @@ BatchProcessor. builder() assertThat(stderr.toString()).isEqualTo(""); assertThat(stdout.toString()).isEqualTo(""" - INFO Starting batch processing + INFO Starting batch processing of 0 items INFO Finished batch processing of 0 items """); assertThat(messagesOf(logged)).containsExactly( - "INFO Starting batch processing", + "INFO Starting batch processing of 0 items", "INFO Finished batch processing of 0 items"); } diff --git a/src/test/java/nl/knaw/dans/dvcli/command/AbstractSubcommandContainerTest.java b/src/test/java/nl/knaw/dans/dvcli/command/AbstractSubcommandContainerTest.java index a7879ed..b0d2749 100644 --- a/src/test/java/nl/knaw/dans/dvcli/command/AbstractSubcommandContainerTest.java +++ b/src/test/java/nl/knaw/dans/dvcli/command/AbstractSubcommandContainerTest.java @@ -15,6 +15,7 @@ */ package nl.knaw.dans.dvcli.command; +import lombok.extern.slf4j.Slf4j; import nl.knaw.dans.dvcli.AbstractCapturingTest; import nl.knaw.dans.dvcli.action.Pair; import nl.knaw.dans.dvcli.command.collection.CollectionCmd; @@ -32,8 +33,8 @@ import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +@Slf4j public class AbstractSubcommandContainerTest extends AbstractCapturingTest { - private static final Logger log = LoggerFactory.getLogger(AbstractSubcommandContainerTest.class); private static class TestCmd extends AbstractSubcommandContainer { @@ -265,14 +266,14 @@ public void batchProcessor_does_not_throw() { assertThat(stderr.toString()).isEqualTo("1: OK. "); assertThat(stdout.toString()).isEqualTo(""" - INFO Starting batch processing - INFO Processing item 1 of 1 + INFO Starting batch processing of 1 items + INFO Processing item 1 of 1: 1 value of 1 INFO Finished batch processing of 1 items """); assertThat(logged.list.stream().map(Object::toString).toList()).containsExactly( - "[INFO] Starting batch processing", - "[INFO] Processing item 1 of 1", + "[INFO] Starting batch processing of 1 items", + "[INFO] Processing item 1 of 1: 1", "[INFO] Finished batch processing of 1 items" ); } diff --git a/src/test/java/nl/knaw/dans/dvcli/command/CollectionCreateDatasetTest.java b/src/test/java/nl/knaw/dans/dvcli/command/CollectionCreateDatasetTest.java index 92a0f14..a844f96 100644 --- a/src/test/java/nl/knaw/dans/dvcli/command/CollectionCreateDatasetTest.java +++ b/src/test/java/nl/knaw/dans/dvcli/command/CollectionCreateDatasetTest.java @@ -64,12 +64,12 @@ public void doCall_continues_on_unknownHost() throws Exception { cmd.doCall(); assertThat(stdout.toString()).isEqualTo(""" - INFO Starting batch processing - INFO Processing item 1 of 3 + INFO Starting batch processing of 3 items + INFO Processing item 1 of 3: A DEBUG buildUri: https://does.not.exist.dans.knaw.nl/api/dataverses/A/datasets - INFO Processing item 2 of 3 + INFO Processing item 2 of 3: B DEBUG buildUri: https://does.not.exist.dans.knaw.nl/api/dataverses/B/datasets - INFO Processing item 3 of 3 + INFO Processing item 3 of 3: C DEBUG buildUri: https://does.not.exist.dans.knaw.nl/api/dataverses/C/datasets INFO Finished batch processing of 3 items """); @@ -103,8 +103,8 @@ public void doCall_is_happy() throws Exception { assertThat(stderr.toString()).isEqualTo("A: OK. "); assertThat(stdout.toString()).isEqualTo(""" - INFO Starting batch processing - INFO Processing item 1 of 1 + INFO Starting batch processing of 1 items + INFO Processing item 1 of 1: A mock response INFO Finished batch processing of 1 items """); @@ -138,9 +138,9 @@ public void doCall_with_dir_as_json_file_fails() throws Exception { B: FAILED: Exception type = IOException, message = Is a directory """); assertThat(stdout.toString()).isEqualTo(""" - INFO Starting batch processing - INFO Processing item 1 of 2 - INFO Processing item 2 of 2 + INFO Starting batch processing of 2 items + INFO Processing item 1 of 2: A + INFO Processing item 2 of 2: B INFO Finished batch processing of 2 items """); } diff --git a/src/test/java/nl/knaw/dans/dvcli/command/TruncateNotificationsTest.java b/src/test/java/nl/knaw/dans/dvcli/command/TruncateNotificationsTest.java index 6629907..43af884 100644 --- a/src/test/java/nl/knaw/dans/dvcli/command/TruncateNotificationsTest.java +++ b/src/test/java/nl/knaw/dans/dvcli/command/TruncateNotificationsTest.java @@ -91,16 +91,16 @@ public void doCall_with_several_users_to_truncate_notifications_works() throws E assertThat(stderr.toString()).isEqualTo("1: OK. 2: OK. 3: OK. "); assertThat(stdout.toString()).isEqualTo(""" - INFO Starting batch processing - INFO Processing item 1 of 3 + INFO Starting batch processing of 3 items + INFO Processing item 1 of 3: 1 INFO Deleting notifications for user with id 1 Deleted 3 record(s) for user with id 1 DEBUG Sleeping for 10 ms - INFO Processing item 2 of 3 + INFO Processing item 2 of 3: 2 INFO Deleting notifications for user with id 2 Deleted 2 record(s) for user with id 2 DEBUG Sleeping for 10 ms - INFO Processing item 3 of 3 + INFO Processing item 3 of 3: 3 INFO Deleting notifications for user with id 3 Deleted 1 record(s) for user with id 3 INFO Finished batch processing of 3 items From acaf7550d44ab9c4f3d01dfa02a443082cc91683 Mon Sep 17 00:00:00 2001 From: Jan van Mansum Date: Fri, 25 Oct 2024 19:20:21 +0200 Subject: [PATCH 4/4] Finished implementation --- .../dans/dvcli/action/BatchProcessor.java | 7 +- .../command/AbstractSubcommandContainer.java | 4 + .../dataset/DatasetDeleteMetadata.java | 59 ++++++++++- .../dans/dvcli/inputparsers/CsvStream.java | 45 +++++++++ .../FieldValuesParamsFileParser.java | 92 +++++++++++++++++ .../dvcli/inputparsers/FieldValuesParser.java | 51 ++++++---- .../FieldValuesParamsFileParserTest.java | 99 +++++++++++++++++++ .../inputparsers/FieldValuesParserTest.java | 40 +++++--- 8 files changed, 357 insertions(+), 40 deletions(-) create mode 100644 src/main/java/nl/knaw/dans/dvcli/inputparsers/CsvStream.java create mode 100644 src/main/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParamsFileParser.java create mode 100644 src/test/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParamsFileParserTest.java diff --git a/src/main/java/nl/knaw/dans/dvcli/action/BatchProcessor.java b/src/main/java/nl/knaw/dans/dvcli/action/BatchProcessor.java index 3459ca5..6ca6f9a 100644 --- a/src/main/java/nl/knaw/dans/dvcli/action/BatchProcessor.java +++ b/src/main/java/nl/knaw/dans/dvcli/action/BatchProcessor.java @@ -35,7 +35,7 @@ @Slf4j public class BatchProcessor { /** - * The labeled items to process. + * The labeled items to process. The String is the label, I is the item. */ @NonNull private final Stream> labeledItems; @@ -69,6 +69,11 @@ public BatchProcessorBuilder labeledItems(Collection> item this.numberOfItems = (long) items.size(); return this; } + + public BatchProcessorBuilder labeledItems(Stream> items) { + this.labeledItems = items; + return this; + } } public void process() { diff --git a/src/main/java/nl/knaw/dans/dvcli/command/AbstractSubcommandContainer.java b/src/main/java/nl/knaw/dans/dvcli/command/AbstractSubcommandContainer.java index 9a06690..4d369ec 100644 --- a/src/main/java/nl/knaw/dans/dvcli/command/AbstractSubcommandContainer.java +++ b/src/main/java/nl/knaw/dans/dvcli/command/AbstractSubcommandContainer.java @@ -29,6 +29,10 @@ import java.io.IOException; import java.util.List; +/** + * + * @param + */ public abstract class AbstractSubcommandContainer extends AbstractCmd { private static final long DEFAULT_DELAY = 1000; diff --git a/src/main/java/nl/knaw/dans/dvcli/command/dataset/DatasetDeleteMetadata.java b/src/main/java/nl/knaw/dans/dvcli/command/dataset/DatasetDeleteMetadata.java index f064414..2855b4a 100644 --- a/src/main/java/nl/knaw/dans/dvcli/command/dataset/DatasetDeleteMetadata.java +++ b/src/main/java/nl/knaw/dans/dvcli/command/dataset/DatasetDeleteMetadata.java @@ -15,9 +15,15 @@ */ package nl.knaw.dans.dvcli.command.dataset; +import lombok.Value; +import nl.knaw.dans.dvcli.action.Pair; +import nl.knaw.dans.dvcli.action.ThrowingFunction; import nl.knaw.dans.dvcli.command.AbstractCmd; +import nl.knaw.dans.dvcli.inputparsers.FieldValuesParamsFileParser; import nl.knaw.dans.dvcli.inputparsers.FieldValuesParser; +import nl.knaw.dans.lib.dataverse.DatasetApi; import nl.knaw.dans.lib.dataverse.model.dataset.FieldList; +import nl.knaw.dans.lib.dataverse.model.dataset.MetadataField; import picocli.CommandLine.ArgGroup; import picocli.CommandLine.Command; import picocli.CommandLine.Option; @@ -25,7 +31,10 @@ import java.nio.file.Path; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Set; +import java.util.stream.Stream; @Command(name = "delete-metadata", mixinStandardHelpOptions = true, @@ -54,12 +63,52 @@ static class FieldValueOrParameterFile { @ArgGroup(multiplicity = "1") private FieldValueOrParameterFile fieldValueOrParameterFile; + private static class DeleteMetadataAction implements ThrowingFunction { + @Override + public String apply(DeleteMetadataParams deleteMetadataParams) throws Exception { + var fieldList = new FieldList(deleteMetadataParams.fieldValues.stream().toList()); + deleteMetadataParams.api.deleteMetadata(fieldList, Collections.emptyMap()); + return "Delete metadata"; + } + } + + @Value + private static class DeleteMetadataParams { + DatasetApi api; + Set fieldValues; + } + @Override public void doCall() throws Exception { - var metadataFields = new FieldValuesParser(fieldValueOrParameterFile.fieldValues).parse(); - datasetCmd.batchProcessor(d -> { - d.deleteMetadata(new FieldList(metadataFields), Collections.emptyMap()); - return "Delete metadata"; - }).process(); + datasetCmd. paramsBatchProcessorBuilder() + .labeledItems(getLabeledItems()) + .action(new DeleteMetadataAction()) + .build() + .process(); + } + + private Stream> getLabeledItems() { + try { + if (fieldValueOrParameterFile.fieldValues != null) { + var keyValues = new HashMap(); + for (var fieldValue : fieldValueOrParameterFile.fieldValues) { + var split = fieldValue.split("=", 2); + keyValues.put(split[0], split[1]); + } + return datasetCmd.getItems().stream() + .map(p -> new Pair<>(p.getFirst(), new FieldValuesParser(keyValues).parse())) + .map(p -> new Pair<>(p.getFirst(), new DeleteMetadataParams(datasetCmd.getDataverseClient().dataset(p.getFirst()), p.getSecond()))); + + } + else if (fieldValueOrParameterFile.parametersFile != null) { + return new FieldValuesParamsFileParser(fieldValueOrParameterFile.parametersFile) + .parse() + .map(p -> new Pair<>(p.getFirst(), new DeleteMetadataParams(datasetCmd.getDataverseClient().dataset(p.getFirst()), p.getSecond()))); + } + } + catch (Exception e) { + throw new RuntimeException("Error parsing field values or parameter file.", e); + } + throw new IllegalArgumentException("No field values or parameter file specified."); } } diff --git a/src/main/java/nl/knaw/dans/dvcli/inputparsers/CsvStream.java b/src/main/java/nl/knaw/dans/dvcli/inputparsers/CsvStream.java new file mode 100644 index 0000000..bcb653d --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvcli/inputparsers/CsvStream.java @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvcli.inputparsers; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVRecord; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +public class CsvStream { + private final Path csvFile; + + public CsvStream(Path csvFile) { + this.csvFile = csvFile; + } + + public Stream stream() throws IOException { + CSVParser parser = CSVParser.parse(csvFile, StandardCharsets.UTF_8, CSVFormat.DEFAULT.builder().setSkipHeaderRecord(true).build()); + return StreamSupport.stream(parser.spliterator(), false).onClose(() -> { + try { + parser.close(); + } catch (IOException e) { + e.printStackTrace(); + } + }); + } +} \ No newline at end of file diff --git a/src/main/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParamsFileParser.java b/src/main/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParamsFileParser.java new file mode 100644 index 0000000..5fae1db --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParamsFileParser.java @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvcli.inputparsers; + +import lombok.AllArgsConstructor; +import nl.knaw.dans.dvcli.action.Pair; +import nl.knaw.dans.lib.dataverse.model.dataset.MetadataField; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVRecord; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +@AllArgsConstructor +public class FieldValuesParamsFileParser { + private final Path csvFile; + + /** + * Parse the parameters file and return a stream of pairs of PID and a list of MetadataFields. The client is expected to process the list of MetadataFields for the given PID. Note that the + * MetadataField class actually represents a metadata field value, not merely field definition (although it does contain the field definition, such as repeatability). + * + * The parameters file must have the following format: + * + *
+     * PID,field1,parentField1*.subfieldA,parentField1*.subfieldB <-- the header
+     * doi:10.5072/dans-2a3-4b5,foo,bar,baz <-- a row
+     * doi:10.5072/dans-2a3-4b5,foo,bar,baz <-- another row
+     * 
+ * + * The asterisk (*) is used to indicate that the field is multi-value, i.e. repeatable. + * + * @return a stream of pairs of PID and a list of MetadataFields + */ + public Stream>> parse() { + try { + CSVParser parser = CSVParser.parse(csvFile, StandardCharsets.UTF_8, + CSVFormat.DEFAULT.builder() + .setHeader() + .setSkipHeaderRecord(true).build()); + return StreamSupport.stream(parser.spliterator(), false).onClose(() -> { + try { + parser.close(); + } + catch (IOException e) { + throw new RuntimeException(e); + } + }).map(record -> parseRecord(record, new HashSet<>(parser.getHeaderMap().keySet()))); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + private Pair> parseRecord(CSVRecord record, Set headers) { + String pid = record.get("PID"); + if (pid == null || pid.isBlank()) { + throw new IllegalArgumentException("PID is missing in the parameters file"); + } + + Map keyValues = new HashMap<>(); + for (String header : headers) { + if (header.equals("PID")) { + continue; + } + keyValues.put(header, record.get(header)); + } + + return new Pair<>(pid, new FieldValuesParser(keyValues).parse()); + } +} diff --git a/src/main/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParser.java b/src/main/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParser.java index 6296e0d..7dff77e 100644 --- a/src/main/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParser.java +++ b/src/main/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParser.java @@ -20,22 +20,22 @@ import nl.knaw.dans.lib.dataverse.model.dataset.MetadataField; import nl.knaw.dans.lib.dataverse.model.dataset.PrimitiveMultiValueField; import nl.knaw.dans.lib.dataverse.model.dataset.PrimitiveSingleValueField; +import org.apache.commons.lang3.StringUtils; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; @AllArgsConstructor public class FieldValuesParser { - private final List values; + private final Map keyValues; - public List parse() { - Map keyValues = new HashMap<>(); - - for (var value : values) { - String[] split = value.split("=", 2); - keyValues.put(checkValidName(split[0]), split[1]); + public Set parse() { + for (var key : keyValues.keySet()) { + checkValidName(key); } Map> compoundFields = new HashMap<>(); @@ -56,14 +56,16 @@ public List parse() { keyValues.remove(key); } - List result = new ArrayList<>(); + Set result = new HashSet<>(); for (var key : keyValues.keySet()) { - if (key.endsWith("*")) { - result.add(new PrimitiveMultiValueField(key.substring(0, key.length() - 1), List.of(keyValues.get(key)))); - } - else { - result.add(new PrimitiveSingleValueField(key, keyValues.get(key))); + if (StringUtils.isNotBlank(keyValues.get(key))) { + if (key.endsWith("*")) { + result.add(new PrimitiveMultiValueField(key.substring(0, key.length() - 1), List.of(keyValues.get(key)))); + } + else { + result.add(new PrimitiveSingleValueField(key, keyValues.get(key))); + } } } @@ -71,27 +73,38 @@ public List parse() { Map subfields = compoundFields.get(parent); if (parent.endsWith("*")) { var builder = new CompoundFieldBuilder(parent.substring(0, parent.length() - 1), true); + boolean hasValues = false; for (var subfield : subfields.keySet()) { - builder.addSubfield(subfield, subfields.get(subfield)); + if (StringUtils.isNotBlank(subfields.get(subfield))) { + builder.addSubfield(subfield, subfields.get(subfield)); + hasValues = true; + } + } + if (hasValues) { + result.add(builder.build()); } - result.add(builder.build()); } else { var builder = new CompoundFieldBuilder(parent, false); + boolean hasValues = false; for (var subfield : subfields.keySet()) { - builder.addSubfield(subfield, subfields.get(subfield)); + if (StringUtils.isNotBlank(subfields.get(subfield))) { + builder.addSubfield(subfield, subfields.get(subfield)); + hasValues = true; + } + } + if (hasValues) { + result.add(builder.build()); } - result.add(builder.build()); } } return result; } - private String checkValidName(String name) { + private void checkValidName(String name) { if (!name.matches("[a-zA-Z0-9]+\\*?(\\.[a-zA-Z0-9]+)?")) { throw new IllegalArgumentException("Invalid field name: " + name); } - return name; } } diff --git a/src/test/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParamsFileParserTest.java b/src/test/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParamsFileParserTest.java new file mode 100644 index 0000000..5ae6fa3 --- /dev/null +++ b/src/test/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParamsFileParserTest.java @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvcli.inputparsers; + +import nl.knaw.dans.dvcli.AbstractTestWithTestDir; +import nl.knaw.dans.dvcli.action.Pair; +import nl.knaw.dans.lib.dataverse.CompoundFieldBuilder; +import nl.knaw.dans.lib.dataverse.model.dataset.PrimitiveMultiValueField; +import nl.knaw.dans.lib.dataverse.model.dataset.PrimitiveSingleValueField; +import org.apache.commons.io.FileUtils; +import org.junit.jupiter.api.Test; + +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +public class FieldValuesParamsFileParserTest extends AbstractTestWithTestDir { + + @Test + public void parse_should_parse_csv_file_with_pid_and_one_field() throws Exception { + FileUtils.writeStringToFile(testDir.resolve("params.csv").toFile(), """ + PID,field1 + doi:10.5072/dans-2a3-4b5,foo + """, StandardCharsets.UTF_8); + + var result = new FieldValuesParamsFileParser(testDir.resolve("params.csv")).parse(); + assertThat(result).containsExactly( + new Pair<>("doi:10.5072/dans-2a3-4b5", Set.of(new PrimitiveSingleValueField("field1", "foo"))) + ); + } + + @Test + public void parse_should_parse_csv_file_with_pid_and_two_fields() throws Exception { + FileUtils.writeStringToFile(testDir.resolve("params.csv").toFile(), """ + PID,field1,field2 + doi:10.5072/dans-2a3-4b5,foo,bar + """, StandardCharsets.UTF_8); + + var result = new FieldValuesParamsFileParser(testDir.resolve("params.csv")).parse(); + assertThat(result).containsExactly( + new Pair<>("doi:10.5072/dans-2a3-4b5", Set.of( + new PrimitiveSingleValueField("field1", "foo"), + new PrimitiveSingleValueField("field2", "bar") + )) + ); + } + + @Test + public void parse_should_parse_csv_file_with_pid_and_two_fields_and_one_repeated_field() throws Exception { + FileUtils.writeStringToFile(testDir.resolve("params.csv").toFile(), """ + PID,field1,field2,field3* + doi:10.5072/dans-2a3-4b5,foo,bar,baz + """, StandardCharsets.UTF_8); + + var result = new FieldValuesParamsFileParser(testDir.resolve("params.csv")).parse(); + assertThat(result).containsExactly( + new Pair<>("doi:10.5072/dans-2a3-4b5", Set.of( + new PrimitiveSingleValueField("field1", "foo"), + new PrimitiveSingleValueField("field2", "bar"), + new PrimitiveMultiValueField("field3", List.of("baz")) + )) + ); + } + + @Test + public void parse_should_parse_csv_file_with_pid_and_compound_field() throws Exception { + FileUtils.writeStringToFile(testDir.resolve("params.csv").toFile(), """ + PID,field1,parentField1*.subfieldA,parentField1*.subfieldB + doi:10.5072/dans-2a3-4b5,foo,bar,baz + """, StandardCharsets.UTF_8); + + var result = new FieldValuesParamsFileParser(testDir.resolve("params.csv")).parse(); + assertThat(result).containsExactly( + new Pair<>("doi:10.5072/dans-2a3-4b5", Set.of( + new PrimitiveSingleValueField("field1", "foo"), + new CompoundFieldBuilder("parentField1", true) + .addSubfield("subfieldA", "bar") + .addSubfield("subfieldB", "baz") + .build() + )) + ); + } + +} diff --git a/src/test/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParserTest.java b/src/test/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParserTest.java index 7892332..9757b61 100644 --- a/src/test/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParserTest.java +++ b/src/test/java/nl/knaw/dans/dvcli/inputparsers/FieldValuesParserTest.java @@ -20,39 +20,48 @@ import nl.knaw.dans.lib.dataverse.model.dataset.PrimitiveSingleValueField; import org.junit.jupiter.api.Test; +import java.util.HashMap; import java.util.List; +import java.util.Map; import static org.assertj.core.api.Assertions.assertThat; public class FieldValuesParserTest { + // N.B. Map.of returns an immutable map, which is not what we want here + private Map map(String... values) { + if (values.length % 2 != 0) { + throw new IllegalArgumentException("Even number of arguments required"); + } + var map = new HashMap(); + for (int i = 0; i < values.length; i += 2) { + map.put(values[i], values[i + 1]); + } + return map; + } + @Test public void parse_should_create_one_single_value_field() { - var values = List.of("field1=value1"); - assertThat(new FieldValuesParser(values).parse()) + assertThat(new FieldValuesParser(map("field1", "value1")).parse()) .containsExactly(new PrimitiveSingleValueField("field1", "value1")); - } @Test public void parse_should_create_one_multivalue_field_if_name_ends_with_asterisk() { - var values = List.of("field1*=value1"); - assertThat(new FieldValuesParser(values).parse()) + assertThat(new FieldValuesParser(map("field1*", "value1")).parse()) .containsExactly(new PrimitiveMultiValueField("field1", List.of("value1"))); } @Test public void parse_should_create_two_single_value_fields() { - var values = List.of("field1=value1", "field2=value2"); - assertThat(new FieldValuesParser(values).parse()) - .containsExactly(new PrimitiveSingleValueField("field1", "value1"), - new PrimitiveSingleValueField("field2", "value2")); + assertThat(new FieldValuesParser(map("field1", "value1", "field2", "value2")).parse()) + .containsExactlyInAnyOrder(new PrimitiveSingleValueField("field1", "value1"), + new PrimitiveSingleValueField("field2", "value2")); } @Test public void parse_should_create_one_multivalue_field_if_name_contains_dot() { - var values = List.of("parent.child=value1"); - assertThat(new FieldValuesParser(values).parse()) + assertThat(new FieldValuesParser(map("parent.child", "value1")).parse()) .containsExactly(new CompoundFieldBuilder("parent", false) .addSubfield("child", "value1") .build()); @@ -60,8 +69,7 @@ public void parse_should_create_one_multivalue_field_if_name_contains_dot() { @Test public void parse_should_create_one_multivalue_field_if_name_contains_dot_and_parent_name_ends_with_asterisk() { - var values = List.of("parent*.child=value1"); - assertThat(new FieldValuesParser(values).parse()) + assertThat(new FieldValuesParser(map("parent*.child", "value1")).parse()) .containsExactly(new CompoundFieldBuilder("parent", true) .addSubfield("child", "value1") .build()); @@ -69,8 +77,9 @@ public void parse_should_create_one_multivalue_field_if_name_contains_dot_and_pa @Test public void parse_should_create_one_multivalue_field_if_name_contains_dot_and_parent_name_ends_with_asterisk_and_multiple_subfields() { - var values = List.of("parent*.child1=value1", "parent*.child2=value2"); - assertThat(new FieldValuesParser(values).parse()) + assertThat(new FieldValuesParser(map( + "parent*.child1", "value1", + "parent*.child2", "value2")).parse()) .containsExactly(new CompoundFieldBuilder("parent", true) .addSubfield("child1", "value1") .addSubfield("child2", "value2") @@ -78,3 +87,4 @@ public void parse_should_create_one_multivalue_field_if_name_contains_dot_and_pa } } +