Skip to content

Commit

Permalink
subfield and compound field only separated with index
Browse files Browse the repository at this point in the history
  • Loading branch information
jo-pol committed Jan 11, 2024
1 parent e846bd9 commit 0699232
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 19 deletions.
12 changes: 6 additions & 6 deletions src/datastation/dataverse/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,19 @@ def update_metadata(self, data: dict, replace: bool = False):
compound_fields = {} # assumed to be repetitive
simple_fields = {}
for type_name in type_names:
match = re.match('([-a-z]+)(\\[([0-9]+)])?(@([-a-z]+))?$', type_name, re.IGNORECASE)
match = re.match('([-a-z]+)((\\[([0-9]+)])|@)?([-a-z]+)?$', type_name, re.IGNORECASE)
if match is None:
raise Exception(f"Invalid typeName {type_name}={data[type_name]}")
raise Exception(f"Invalid typeName [{type_name}] : {data}")
if '@' in type_name:
raise Exception(f"Single compound fields [{type_name}] are not supported : {data}")
if '[' not in type_name:
simple_fields[type_name] = data[type_name]
if '@' in type_name:
raise Exception(f"Single compound fields are not supported: {type_name}={data[type_name]}")
if not replace:
raise Exception(f"Single value fields must be replaced: {type_name}={data[type_name]}")
raise Exception(f"Single value fields [{type_name}] must be replaced : {data}")
else:
parent = match.group(1)
child = match.group(5)
index = int(match.group(3))
index = int(match.group(4))
if child is None:
if parent not in simple_fields.keys():
simple_fields[parent] = []
Expand Down
8 changes: 4 additions & 4 deletions src/datastation/dv_dataset_edit_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ def main():
help="At least once in combination with a PID, none in combination with a CSV file. "
"The new values for fields must be formatted as <typeName>=<value>. "
"For example: title='New title'. "
"A subfield in a compound field must be prefixed with "
"the typeName of the compound field and an @ sign, for example: "
"--value author[0]@authorName='the name' "
"--value author[0]@authorAffiliation='the organization'. "
"A subfield in a compound field must be prefixed with the typeName of the compound field "
"and an index (single compound fields are not implemented), for example: "
"--value author[0]authorName='the name' "
"--value author[0]authorAffiliation='the organization'. "
"Only repetitive compound fields are supported. "
"An attempt to update a protected field will result in '403 Client Error: Forbidden'. "
"You may also get a 403 when updating author details without updating the authorName. "
Expand Down
40 changes: 31 additions & 9 deletions src/tests/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,18 @@ def test_update_metadata(self, caplog, capsys):
assert caplog.records[0].funcName == 'update_metadata'
assert caplog.records[0].levelname == 'INFO'

def test_update_single_value_without_replace(self, caplog, capsys):
caplog.set_level('INFO')
client = DataverseClient(config=self.cfg)
datasets = Datasets(client, dry_run=True)
data = {'PID': 'doi:10.5072/FK2/8KQW3Y', 'title': 'New title'}
with pytest.raises(Exception) as e:
datasets.update_metadata(data, replace=False)
assert str(e.value) == ("Single value fields [title] must be replaced : "
"{'PID': 'doi:10.5072/FK2/8KQW3Y', 'title': 'New title'}")
assert capsys.readouterr().out == ''
assert len(caplog.records) == 0

def test_update_metadata_with_repetitive_field_without_replacing(self, caplog, capsys):
caplog.set_level('INFO')
client = DataverseClient(config=self.cfg)
Expand All @@ -48,10 +60,10 @@ def test_update_metadata_with_repetitive_compound_field(self, caplog, capsys):
client = DataverseClient(config=self.cfg)
datasets = Datasets(client, dry_run=True)
data = {'PID': 'doi:10.5072/FK2/8KQW3Y',
'author[0]@authorName': 'me',
'author[0]@authorAffiliation': 'mine',
'author[1]@authorName': 'you',
'author[1]@authorAffiliation': 'yours'}
'author[0]authorName': 'me',
'author[0]authorAffiliation': 'mine',
'author[1]authorName': 'you',
'author[1]authorAffiliation': 'yours'}

datasets.update_metadata(data)

Expand Down Expand Up @@ -80,16 +92,23 @@ def test_update_metadata_with_single_compound_field(self, caplog, capsys):
'socialScienceNotes@socialScienceNotesSubject': 'q',
'socialScienceNotes@socialScienceNotesText': 'r'}
# {"fields": [{"typeName": "socialScienceNotes", "value": {...}}]}
# would cause a bad request with "Semantic error parsing dataset update Json: Empty value for field: Notes"
# in the server log
# would cause a bad request
# with "Semantic error parsing dataset update Json: Empty value for field: Notes" in the server log
# {"fields": [{"typeName": "socialScienceNotes", "value": [{...}]}]}
# would cause an internal server error with an exception thrown by JsonParser.parseCompoundValue:
# JsonArrayImpl cannot be cast to class javax.json.JsonObject
#
# comrades-dclDryLab and comrades-dclWetLab seem to be the only single compound fields
# on dd-dtap/provisioning/files/custom-metadata-blocks/*.tsv

with pytest.raises(Exception) as e:
datasets.update_metadata(data)
assert str(e.value) == ("Single compound fields are not supported: "
"socialScienceNotes@socialScienceNotesType=p")
assert str(e.value) == ('Single compound fields '
"[socialScienceNotes@socialScienceNotesType] are not supported : {"
"'PID': 'doi:10.5072/FK2/8KQW3Y', "
"'socialScienceNotes@socialScienceNotesType': 'p', "
"'socialScienceNotes@socialScienceNotesSubject': 'q', "
"'socialScienceNotes@socialScienceNotesText': 'r'}")
assert capsys.readouterr().out == ''
assert len(caplog.records) == 0

Expand All @@ -100,7 +119,10 @@ def test_update_metadata_with_too_many_values(self, caplog, capsys):
data = {'PID': 'doi:10.5072/FK2/8KQW3Y', 'title': 'xxx', 'dansRightsHolder[0]': 'me', 'rest.column': 'you'}
with pytest.raises(Exception) as e:
datasets.update_metadata(data, replace=True)
assert str(e.value) == "Invalid typeName rest.column=you"
assert str(e.value) == ("Invalid typeName [rest.column] : {"
"'PID': 'doi:10.5072/FK2/8KQW3Y', "
"'title': 'xxx', 'dansRightsHolder[0]': 'me', "
"'rest.column': 'you'}")
assert capsys.readouterr().out == ''
assert len(caplog.records) == 1
assert caplog.records[0].levelname == 'DEBUG'
Expand Down

0 comments on commit 0699232

Please sign in to comment.