From 53f24a82b8490e473268a37eaf30bcc3737cfa9e Mon Sep 17 00:00:00 2001 From: dustine32 Date: Wed, 4 Oct 2023 12:06:16 -0700 Subject: [PATCH] Handle GPI label merge for geneontology/go-releases#50 --- ontobio/model/collections.py | 16 ++++++++++++++-- tests/test_collections.py | 9 +++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/ontobio/model/collections.py b/ontobio/model/collections.py index 392c53a1..2ffb5205 100644 --- a/ontobio/model/collections.py +++ b/ontobio/model/collections.py @@ -17,9 +17,21 @@ class BioEntities: def merge(self, other): """ Merge another BioEntity set into this one. The `other` set will - override any collisions in this BioEntities + override any collisions in this BioEntities except for + any specific fields (e.g., label) handled below """ - self.entities.update(other.entities) + # self.entities.update(other.entities) + for ent in other.entities: + if ent in self.entities: + # Handle specific field merges here + merged_ent = other.entities[ent] + + # Carry forward existing label if other label is blank string or None + if not merged_ent.label: + merged_ent.label = self.entities[ent].label + self.entities[ent] = merged_ent + else: + self.entities[ent] = other.entities[ent] return self def get(self, entity_id: Curie) -> Optional[Subject]: diff --git a/tests/test_collections.py b/tests/test_collections.py index eb4fd018..df03dbdc 100644 --- a/tests/test_collections.py +++ b/tests/test_collections.py @@ -26,6 +26,15 @@ def test_bioentities_merge(): Curie("BAR", "987"): Subject(Curie("BAR", "987"), "goodbye", "world", [], "protien", Curie("NCBITaxon", "999")) }) + # Test that blank label does not overwrite existing label + p = collections.BioEntities({ + Curie("BAR", "987"): Subject(Curie("BAR", "987"), "", "world", [], "protien", Curie("NCBITaxon", "999")) + }) + + assert o.merge(p) == collections.BioEntities({ + Curie("BAR", "987"): Subject(Curie("BAR", "987"), "goodbye", "world", [], "protien", Curie("NCBITaxon", "999")) + }) + def test_bioentities_merge_clobber(): e = collections.BioEntities({ Curie("FOO", "123"): Subject(Curie("FOO", "123"), "hello", "world", [], "protien", Curie("NCBITaxon", "12345"))