From 90ec32f4d98a30b4cd679f5e426bc26fea41672c Mon Sep 17 00:00:00 2001 From: Keigh Rim Date: Thu, 6 Jun 2024 20:02:06 -0400 Subject: [PATCH] Mmif.__getitem__ now search for an annotation with short ID --- mmif/serialize/mmif.py | 32 ++++++++++++++++++++------------ tests/test_serialize.py | 23 +++++++++++++++++++++++ 2 files changed, 43 insertions(+), 12 deletions(-) diff --git a/mmif/serialize/mmif.py b/mmif/serialize/mmif.py index 66cd12a6..62112038 100644 --- a/mmif/serialize/mmif.py +++ b/mmif/serialize/mmif.py @@ -661,30 +661,38 @@ def get_end(self, annotation: Annotation) -> Union[int, float]: def __getitem__(self, item: str) \ -> Union[Document, View, Annotation, MmifMetadata, DocumentsList, ViewsList]: """ - getitem implementation for Mmif. When nothing is found, this will raise an error - rather than returning a None (although pytype doesn't think so...) + getitem implementation for Mmif. This will try to find any object, given an identifier or an immediate + attribute name. When nothing is found, this will raise an error rather than returning a None :raises KeyError: if the item is not found or if the search results are ambiguous - :param item: the search string, a document ID, a view ID, or a view-scoped annotation ID + :param item: an attribute name or an object identifier (a document ID, a view ID, or an annotation ID). When + annotation ID is given as a "short" ID (without view ID prefix), the method will try to find a + match from the first view, and return immediately if found. :return: the object searched for + :raise KeyError: if the item is not found or multiple objects are found with the same ID """ if item in self._named_attributes(): return self.__dict__[item] split_attempt = item.split(self.id_delimiter) - document_result = self.documents.get(split_attempt[0]) - view_result = self.views.get(split_attempt[0]) + found = [] if len(split_attempt) == 1: - anno_result = None - elif view_result: - anno_result = view_result[split_attempt[1]] + found.append(self.documents.get(split_attempt[0])) + found.append(self.views.get(split_attempt[0])) + for view in self.views: + found.append(view.annotations.get(split_attempt[0])) + elif len(split_attempt) == 2: + v = self.get_view_by_id(split_attempt[0]) + if v is not None: + found.append(v.annotations.get(split_attempt[1])) else: raise KeyError("Tried to subscript into a view that doesn't exist") + found = [x for x in found if x is not None] - if view_result and document_result: + if len(found) > 1: raise KeyError("Ambiguous ID search result") - if not (view_result or document_result): + elif len(found) == 0: raise KeyError("ID not found: %s" % item) - return anno_result or view_result or document_result - + else: + return found[-1] diff --git a/tests/test_serialize.py b/tests/test_serialize.py index 3079d0e4..b83174b6 100644 --- a/tests/test_serialize.py +++ b/tests/test_serialize.py @@ -516,6 +516,29 @@ def test_mmif_getitem_document(self): except KeyError: self.fail("didn't get document 'm1'") + def test_mmif_getitem_idconflict(self): + m = Mmif(validate=False) + v1 = m.new_view() + v1.id = 'v1' + v2 = m.new_view() + v2.id = 'v1' + with pytest.raises(KeyError): + _ = m['v1'] + + m = Mmif(validate=False) + v1 = m.new_view() + v1a = v1.new_annotation(AnnotationTypes.Annotation, id='a1') + v2 = m.new_view() + v2a = v2.new_annotation(AnnotationTypes.Annotation, id='a1') + self.assertIsNotNone(m[v1.id]) + self.assertIsNotNone(m[v2.id]) + # conflict short IDs + self.assertEqual(v1a.id, v2a.id) + with pytest.raises(KeyError): + _ = m[v1a.id] + self.assertIsNotNone(m[v1a.long_id]) + self.assertIsNotNone(m[v2a.long_id]) + def test_mmif_getitem_view(self): try: v1 = self.mmif_obj['v1']