From e7a8a76764a432ef9583c5b9e8dce2e3866d3785 Mon Sep 17 00:00:00 2001 From: jo-pol Date: Thu, 30 Nov 2023 12:03:14 +0100 Subject: [PATCH] quotes in CSV --- src/datastation/dataverse/datasets.py | 15 +++++++++------ src/datastation/dv_dataset_edit_metadata.py | 16 +++++++++++----- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/datastation/dataverse/datasets.py b/src/datastation/dataverse/datasets.py index c104318..e143c3d 100644 --- a/src/datastation/dataverse/datasets.py +++ b/src/datastation/dataverse/datasets.py @@ -11,15 +11,18 @@ def __init__(self, dataverse_client: DataverseClient, dry_run: bool = False): self.dry_run = dry_run def update_metadata(self, data: dict): + if 'rest.column' in data.keys(): + logging.error(data) + raise Exception("Quoting problem or too many values.") logging.debug(data) - all_fields = [] if '@' in ' '.join(data.keys()): raise Exception("Compound fields not supported") - for key in [key for key in data.keys() if key != 'PID' and not data[key].startswith('[')]: - all_fields.append({'typeName': key, 'value': data[key]}) - for key in [key for key in data.keys() if key != 'PID' and data[key].startswith('[')]: - logging.debug('-------' + data[key] + '=======') - all_fields.append({'typeName': key, 'value': json.loads(data[key])}) + all_fields = [] + for key in [key for key in data.keys() if key != 'PID' and data[key] is not None]: + if data[key].startswith('['): + all_fields.append({'typeName': key, 'value': (json.loads(data[key]))}) + else: + all_fields.append({'typeName': key, 'value': data[key]}) logging.debug(all_fields) dataset_api = self.dataverse_client.dataset(data['PID']) result = dataset_api.edit_metadata(data=(json.dumps({'fields': all_fields})), dry_run=self.dry_run) diff --git a/src/datastation/dv_dataset_edit_metadata.py b/src/datastation/dv_dataset_edit_metadata.py index 6f38ed8..307742e 100644 --- a/src/datastation/dv_dataset_edit_metadata.py +++ b/src/datastation/dv_dataset_edit_metadata.py @@ -1,3 +1,4 @@ +import csv import re from argparse import ArgumentParser from csv import DictReader @@ -19,12 +20,16 @@ def main(): "The other columns MUST have a typeName, as for the --value argument.") parser.add_argument('-v', '--value', action='append', help="At least once in combination with a PID, not allowed in combination with CSV file. " - "The new values for fields formatted as " - "= for example title='New title'. " + "The new values for fields formatted as =. " + "for example: title='New title'. " "A subfield in a compound field must be prefixed with " - "the typeName of the compound field and an @ sign, e.g. " + "the typeName of the compound field and an @ sign, for example: " "--value author@authorName='the name' " - "--value author@authorAffiliation='the organization'") + "--value author@authorAffiliation='the organization'. " + "The quoting style for repetitive fields is ='[\"\"]', " + "for example: -v dansRightsHolder='[\"me\",\"O'\"'\"'Neill\"]'. " + "Note that all occurrences of a repetitive field will be replaced. " + "") add_batch_processor_args(parser, report=False) add_dry_run_arg(parser) args = parser.parse_args() @@ -59,7 +64,8 @@ def parse_value_args(): if args.value is not None: parser.error("-v/--value arguments not allowed in combination with CSV file: " + args.pid_or_file) with open(args.pid_or_file, newline='') as csvfile: - reader = DictReader(csvfile) + reader = DictReader(csvfile, quotechar="'", delimiter=',', quoting=csv.QUOTE_MINIMAL, + skipinitialspace=True, restkey='rest.column', escapechar=None) validate_fieldnames(reader.fieldnames) run(reader) else: