From c485f063b5c40093269a85523a1c500d9efd4948 Mon Sep 17 00:00:00 2001 From: jbukhari Date: Tue, 17 Dec 2024 18:17:30 -0500 Subject: [PATCH 1/2] ensure null values are not indexed in text collections --- dlx/marc/__init__.py | 2 +- dlx/scripts/build_text_collections.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dlx/marc/__init__.py b/dlx/marc/__init__.py index ace6197..5eabeea 100644 --- a/dlx/marc/__init__.py +++ b/dlx/marc/__init__.py @@ -840,7 +840,7 @@ def index_field_text(*, threaded=True): {'_id': text}, {'$addToSet': {'subfields': {'code': subfield.code, 'value': subfield.value}}}, upsert=True - ) for subfield in field.subfields + ) for subfield in field.subfields if subfield.value ] words = Tokenizer.tokenize(text) diff --git a/dlx/scripts/build_text_collections.py b/dlx/scripts/build_text_collections.py index 038fec1..04d1e71 100644 --- a/dlx/scripts/build_text_collections.py +++ b/dlx/scripts/build_text_collections.py @@ -67,7 +67,7 @@ def run(): # add to seen index seen.setdefault(field.tag, {}) seen[field.tag].setdefault(text, {}) - seen[field.tag][text]['subfields'] = [{'code': x.code, 'value': x.value} for x in field.subfields] + seen[field.tag][text]['subfields'] = [{'code': x.code, 'value': x.value} for x in field.subfields if x.value] updates.setdefault(field.tag, []) From 8694be37102d800e74175d9739bfec44bb3238d3 Mon Sep 17 00:00:00 2001 From: jbukhari Date: Wed, 18 Dec 2024 13:41:16 -0500 Subject: [PATCH 2/2] auth delete subfield trigger delete in linked records --- dlx/marc/__init__.py | 32 ++++++++++++++++++++------------ tests/test_marc.py | 11 +++++++++++ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/dlx/marc/__init__.py b/dlx/marc/__init__.py index 5eabeea..bdbb81b 100644 --- a/dlx/marc/__init__.py +++ b/dlx/marc/__init__.py @@ -769,7 +769,7 @@ def validate(self): except jsonschema.exceptions.ValidationError as e: msg = '{} in {} : {}'.format(e.message, str(list(e.path)), self.to_json()) raise jsonschema.exceptions.ValidationError(msg) - + @Decorators.check_connected def commit(self, user='admin', auth_check=True, update_attached=True): new_record = True if self.id is None else False @@ -1038,20 +1038,28 @@ def update_attached_records(auth): for record in auth.list_attached(): def do_update(): try: - if isinstance(record, Auth): + if isinstance(record, Auth) and auth.id in [x.id for x in record.list_attached(usage_type='auth')]: # prevent feedback loops - if auth.id in [x.id for x in record.list_attached(usage_type='auth')]: - record.commit(user=auth.user, auth_check=False, update_attached=False) - return + record.commit(user=auth.user, auth_check=False, update_attached=False) + return # if the heading field tag changed, change the tag in linked record - if old_tag := Auth(previous_state).heading_field.tag: - if old_tag != self.heading_field.tag: - for field in record.datafields: - for subfield in filter(lambda x: hasattr(x, 'xref'), field.subfields): - if subfield.xref == self.id: - new_tag = field.tag[0] + self.heading_field.tag[1:] - field.tag = new_tag + if self.heading_field.tag != Auth(previous_state).heading_field.tag: + for field in record.datafields: + for subfield in filter(lambda x: hasattr(x, 'xref'), field.subfields): + if subfield.xref == self.id: + new_tag = field.tag[0] + self.heading_field.tag[1:] + field.tag = new_tag + + # if any subfields have been deleted, delete them from the linked record + codes_removed = [] + + for subfield in Auth(previous_state).heading_field.subfields: + if subfield.code not in [x.code for x in self.heading_field.subfields]: + codes_removed.append(subfield.code) + + for linked_field in [x for x in record.fields if isinstance(x, Datafield)]: + linked_field.subfields = [x for x in linked_field.subfields if x.code not in codes_removed] record.commit(user=auth.user, auth_check=False) except Exception as err: diff --git a/tests/test_marc.py b/tests/test_marc.py index ef93d5b..43775f2 100644 --- a/tests/test_marc.py +++ b/tests/test_marc.py @@ -164,6 +164,17 @@ def test_commit(db, bibs, auths): assert linked_bib.get_field('600') assert linked_bib.get_field('650') is None + # subfield deleted + auth = Auth() + auth.set('100', 'a', 'will not be deleted').set('100', 'g', 'subfield to be deleted').commit() + bib = Bib() + bib.set('600', 'a', auth.id).set('600', 'g', auth.id).commit() + auth.heading_field.subfields = [x for x in auth.heading_field.subfields if x.code != 'g'] + auth.commit() + bib = Bib.from_id(bib.id) # re-retrive the updated data from the db that was updated in the backgroud + assert len([x for x in auth.heading_field.subfields]) == 1 + assert len([x for x in bib.get_field('600').subfields]) == 1 + def test_delete(db): from dlx import DB from dlx.marc import Bib