From e7a8a76764a432ef9583c5b9e8dce2e3866d3785 Mon Sep 17 00:00:00 2001
From: jo-pol <jo-pol@users.noreply.github.com>
Date: Thu, 30 Nov 2023 12:03:14 +0100
Subject: [PATCH] quotes in CSV

---
 src/datastation/dataverse/datasets.py       | 15 +++++++++------
 src/datastation/dv_dataset_edit_metadata.py | 16 +++++++++++-----
 2 files changed, 20 insertions(+), 11 deletions(-)
diff --git a/src/datastation/dataverse/datasets.py b/src/datastation/dataverse/datasets.py
index c104318..e143c3d 100644
--- a/src/datastation/dataverse/datasets.py
+++ b/src/datastation/dataverse/datasets.py
@@ -11,15 +11,18 @@ def __init__(self, dataverse_client: DataverseClient, dry_run: bool = False):
         self.dry_run = dry_run
 
     def update_metadata(self, data: dict):
+        if 'rest.column' in data.keys():
+            logging.error(data)
+            raise Exception("Quoting problem or too many values.")
         logging.debug(data)
-        all_fields = []
         if '@' in ' '.join(data.keys()):
             raise Exception("Compound fields not supported")
-        for key in [key for key in data.keys() if key != 'PID' and not data[key].startswith('[')]:
-            all_fields.append({'typeName': key, 'value': data[key]})
-        for key in [key for key in data.keys() if key != 'PID' and data[key].startswith('[')]:
-            logging.debug('-------' + data[key] + '=======')
-            all_fields.append({'typeName': key, 'value': json.loads(data[key])})
+        all_fields = []
+        for key in [key for key in data.keys() if key != 'PID' and data[key] is not None]:
+            if data[key].startswith('['):
+                all_fields.append({'typeName': key, 'value': (json.loads(data[key]))})
+            else:
+                all_fields.append({'typeName': key, 'value': data[key]})
         logging.debug(all_fields)
         dataset_api = self.dataverse_client.dataset(data['PID'])
         result = dataset_api.edit_metadata(data=(json.dumps({'fields': all_fields})), dry_run=self.dry_run)
diff --git a/src/datastation/dv_dataset_edit_metadata.py b/src/datastation/dv_dataset_edit_metadata.py
index 6f38ed8..307742e 100644
--- a/src/datastation/dv_dataset_edit_metadata.py
+++ b/src/datastation/dv_dataset_edit_metadata.py
@@ -1,3 +1,4 @@
+import csv
 import re
 from argparse import ArgumentParser
 from csv import DictReader
@@ -19,12 +20,16 @@ def main():
                              "The other columns MUST have a typeName, as for the --value argument.")
     parser.add_argument('-v', '--value', action='append',
                         help="At least once in combination with a PID, not allowed in combination with CSV file. "
-                             "The new values for fields formatted as "
-                             "<typeName>=<value> for example title='New title'. "
+                             "The new values for fields formatted as <typeName>=<value>. "
+                             "for example: title='New title'. "
                              "A subfield in a compound field must be prefixed with "
-                             "the typeName of the compound field and an @ sign, e.g. "
+                             "the typeName of the compound field and an @ sign, for example: "
                              "--value author@authorName='the name' "
-                             "--value author@authorAffiliation='the organization'")
+                             "--value author@authorAffiliation='the organization'. "
+                             "The quoting style for repetitive fields is <typeName>='[\"<value>\"]', "
+                             "for example: -v dansRightsHolder='[\"me\",\"O'\"'\"'Neill\"]'. "
+                             "Note that all occurrences of a repetitive field will be replaced. "
+                             "")
     add_batch_processor_args(parser, report=False)
     add_dry_run_arg(parser)
     args = parser.parse_args()
@@ -59,7 +64,8 @@ def parse_value_args():
         if args.value is not None:
             parser.error("-v/--value arguments not allowed in combination with CSV file: " + args.pid_or_file)
         with open(args.pid_or_file, newline='') as csvfile:
-            reader = DictReader(csvfile)
+            reader = DictReader(csvfile, quotechar="'", delimiter=',', quoting=csv.QUOTE_MINIMAL,
+                                skipinitialspace=True, restkey='rest.column', escapechar=None)
             validate_fieldnames(reader.fieldnames)
             run(reader)
     else: