GateNLP · ianroberts · Jan 21, 2024 · Jan 21, 2024 · Jan 21, 2024 · Jan 21, 2024
diff --git a/backend/models.py b/backend/models.py
@@ -978,25 +978,28 @@ def get_doc_annotation_dict(self, json_format="raw", anonymize=True):
         # Create dictionary for document
         doc_dict = None
         if json_format == "raw" or json_format == "csv":
-            doc_dict = self.data
+            doc_dict = self.data.copy()
         elif json_format == "gate":
+            # GATE json format are expected to have an existing "features" field
+            features_dict = dict(self.data["features"]) if "features" in self.data and isinstance(self.data["features"], dict) else {}
 
-            ignore_keys = {"text", self.project.document_id_field}
-            features_dict = {key: value for key, value in self.data.items() if key not in ignore_keys}
+            # Add any non-compliant top-level fields into the "features" field instead
+            ignore_keys = {"text", "features", "offset_type", "annotation_sets", self.project.document_id_field}
+            features_dict.update({key: value for key, value in self.data.items() if key not in ignore_keys})
 
             doc_dict = {
                 "text": self.data["text"],
                 "features": features_dict,
-                "offset_type": "p",
+                "offset_type": self.data["offset_type"] if "offset_type" in self.data else "p",  # Use original offset type
                 "name": get_value_from_key_path(self.data, self.project.document_id_field)
             }
-            pass
 
         # Insert annotation sets into the doc dict
         annotations = self.annotations.filter(status=Annotation.COMPLETED)
         if json_format == "csv":
+            # Gets pre-existing annotations
+            annotation_sets = dict(self.data["annotations"]) if "annotations" in self.data else {}
             # Format annotations for CSV export
-            annotation_sets = {}
             for annotation in annotations:
                 a_data = annotation.data
                 annotation_dict = {}
@@ -1009,36 +1012,58 @@ def get_doc_annotation_dict(self, json_format="raw", anonymize=True):
                 annotation_dict["duration_seconds"] = annotation.time_to_complete
 
                 if anonymize:
-                    annotation_sets[str(annotation.user.id)] = annotation_dict
+                    annotation_sets[f"{settings.ANONYMIZATION_PREFIX}{annotation.user.id}"] = annotation_dict
                 else:
                     annotation_sets[annotation.user.username] = annotation_dict
 
             doc_dict["annotations"] = annotation_sets
 
         else:
+            # Gets pre-existing annotations
+            annotation_sets = dict(self.data["annotation_sets"]) if "annotation_sets" in self.data else {}
             # Format for JSON in line with GATE formatting
-            annotation_sets = {}
             for annotation in annotations:
                 a_data = annotation.data
+                anonymized_name = f"{settings.ANONYMIZATION_PREFIX}{annotation.user.id}"
                 annotation_set = {
-                    "name": annotation.user.id if anonymize else annotation.user.username,
+                    "name": anonymized_name if anonymize else annotation.user.username,
                     "annotations": [
                         {
                             "type": "Document",
                             "start": 0,
                             "end": 0,
                             "id": 0,
                             "duration_seconds": annotation.time_to_complete,
-                            "features": {
-                                "label": a_data
-                            }
+                            "features": a_data
                         }
                     ],
                     "next_annid": 1,
                 }
-                annotation_sets[annotation.user.username] = annotation_set
+                annotation_sets[anonymized_name if anonymize else annotation.user.username] = annotation_set
+
             doc_dict["annotation_sets"] = annotation_sets
 
+        # Add to the export the lists (possibly empty) of users who rejected,
+        # timed out or aborted annotation of this document
+        teamware_status = {}
+        for key, status in [
+            ("rejected_by", Annotation.REJECTED),
+            ("timed_out", Annotation.TIMED_OUT),
+            ("aborted", Annotation.ABORTED),
+        ]:
+            teamware_status[key] = [
+                f"{settings.ANONYMIZATION_PREFIX}{annotation.user.id}" if anonymize else annotation.user.username
+                for annotation in self.annotations.filter(status=status)
+            ]
+            if json_format == "csv":
+                # Flatten list if exporting as CSV
+                teamware_status[key] = ",".join(str(val) for val in teamware_status[key])
+
+        if json_format == "gate":
+            doc_dict["features"]["teamware_status"] = teamware_status
+        else:
+            doc_dict["teamware_status"] = teamware_status
+
         return doc_dict
 
 

diff --git a/backend/rpc.py b/backend/rpc.py
@@ -510,7 +510,7 @@ def get_projects(request, current_page=1, page_size=None, filters=None):
     # Perform filtering
     if isinstance(filters, str):
         # Search project title if is filter is a string only
-        projects_query = Project.objects.filter(name__contains=filters.strip())
+        projects_query = Project.objects.filter(name__icontains=filters.strip())
         total_count = projects_query.count()
     else:
         projects_query = Project.objects.all()

diff --git a/backend/tests/test_models.py b/backend/tests/test_models.py
@@ -1098,8 +1098,11 @@ def test_get_annotations_for_user_in_project(self):
 class TestDocumentAnnotationModelExport(TestCase):
 
     def setUp(self):
+        self.unanonymized_prefix = "namedperson"
         self.test_user = get_user_model().objects.create(username="project_creator")
-        self.annotators = [get_user_model().objects.create(username=f"anno{i}") for i in range(3)]
+        self.annotator_names = [f"{self.unanonymized_prefix}{i}" for i in range(3)]
+        self.annotators = [get_user_model().objects.create(username=u) for u in self.annotator_names]
+        self.anon_annotator_names = [f"{settings.ANONYMIZATION_PREFIX}{a.id}" for a in self.annotators]
         self.project = Project.objects.create(owner=self.test_user)
         for i in range(10):
             document = Document.objects.create(
@@ -1110,6 +1113,55 @@ def setUp(self):
                     "feature1": "Testvalue 1",
                     "feature2": "Testvalue 1",
                     "feature3": "Testvalue 1",
+                    "features": {
+                        "gate_format_feature1": "Gate feature test value",
+                        "gate_format_feature2": "Gate feature test value",
+                        "gate_format_feature3": "Gate feature test value",
+                    },
+                    "offset_type": "x",
+                    "annotations": {
+                        "existing_annotator1": {
+                            "sentiment": "positive"
+                        },
+                        f"{settings.ANONYMIZATION_PREFIX}{self.annotators[0].pk}": {
+                            "sentiment": "positive"
+                        }
+
+                    },
+                    "annotation_sets": {
+                        "existing_annotator1": {
+                            "name": "existing_annotator1",
+                            "annotations": [
+                                {
+                                    "type": "Document",
+                                    "start": 0,
+                                    "end": 10,
+                                    "id": 0,
+                                    "features": {
+                                        "sentiment": "positive"
+                                    }
+                                }
+                            ],
+                            "next_annid": 1
+                        },
+                        f"{settings.ANONYMIZATION_PREFIX}{self.annotators[0].pk}": {
+                            "name": f"{settings.ANONYMIZATION_PREFIX}{self.annotators[0].pk}",
+                            "annotations": [
+                                {
+                                    "type": "Document",
+                                    "start": 0,
+                                    "end": 10,
+                                    "id": 0,
+                                    "features": {
+                                        "sentiment": "positive"
+                                    }
+                                }
+                            ],
+                            "next_annid": 1
+                        }
+
+                    }
+
 
                 }
             )
@@ -1145,51 +1197,100 @@ def setUp(self):
     def test_export_raw(self):
 
         for document in self.project.documents.all():
+            # Fields should remain exactly the same as what's been uploaded
+            # aside from  annotation_sets
             doc_dict = document.get_doc_annotation_dict("raw")
             print(doc_dict)
             self.assertTrue("id" in doc_dict)
             self.assertTrue("text" in doc_dict)
             self.assertTrue("feature1" in doc_dict)
             self.assertTrue("feature2" in doc_dict)
             self.assertTrue("feature3" in doc_dict)
+            self.assertTrue("features" in doc_dict)
+            self.assertTrue("offset_type" in doc_dict)
+            self.assertTrue("annotations" in doc_dict)
+            doc_features = doc_dict["features"]
+            self.assertTrue("gate_format_feature1" in doc_features)
+            self.assertTrue("gate_format_feature2" in doc_features)
+            self.assertTrue("gate_format_feature3" in doc_features)
+
 
             self.check_raw_gate_annotation_formatting(doc_dict)
+            self.check_teamware_status(doc_dict, self.anon_annotator_names)
 
     def test_export_gate(self):
 
         for document in self.project.documents.all():
+            # All top-level fields apart from name, text, features and annotation_sets should be
+            # nested inside the features field
             doc_dict = document.get_doc_annotation_dict("gate")
             print(doc_dict)
 
             self.assertTrue("text" in doc_dict)
             self.assertTrue("features" in doc_dict)
+            self.assertFalse("annotations" in doc_dict)
+            self.assertEqual(doc_dict["offset_type"], "x")
             doc_features = doc_dict["features"]
             self.assertTrue("id" in doc_features)
             self.assertTrue("feature1" in doc_features)
             self.assertTrue("feature2" in doc_features)
             self.assertTrue("feature3" in doc_features)
+            self.assertTrue("annotations" in doc_features)
+            self.assertFalse("features" in doc_features, "Double nesting of features field")
+            self.assertFalse("offset_type" in doc_features, "Double nesting of offset_type field")
+            self.assertTrue("gate_format_feature1" in doc_features)
+            self.assertTrue("gate_format_feature2" in doc_features)
+            self.assertTrue("gate_format_feature3" in doc_features)
 
             self.check_raw_gate_annotation_formatting(doc_dict)
+            self.check_teamware_status(doc_features, self.anon_annotator_names)
+
+    def test_export_gate_with_no_offset_type(self):
+
+        for document in self.project.documents.all():
+            document.data.pop("offset_type")
 
-    def check_raw_gate_annotation_formatting(self, doc_dict):
+            doc_dict = document.get_doc_annotation_dict("gate")
+            self.assertEqual(doc_dict["offset_type"], "p", "offset_type should default to p")
+
+
+    def check_raw_gate_annotation_formatting(self, doc_dict: dict):
         self.assertTrue("annotation_sets" in doc_dict)
-        self.assertTrue(len(doc_dict["annotation_sets"]) == 3)
+        self.assertEqual(len(doc_dict["annotation_sets"]), 4, doc_dict)
 
         # Test annotation formatting
         for aset_key, aset_data in doc_dict["annotation_sets"].items():
-            self.assertTrue("name" in aset_data)
-            self.assertTrue("annotations" in aset_data)
-            self.assertEqual(len(aset_data["annotations"]), 1)
-            anno_dict = aset_data["annotations"][0]
-            self.assertTrue("type" in anno_dict)
-            self.assertTrue("start" in anno_dict)
-            self.assertTrue("end" in anno_dict)
-            self.assertTrue("id" in anno_dict)
-            self.assertTrue("features" in anno_dict)
-            self.assertTrue("label" in anno_dict["features"])
-            label_dict = anno_dict["features"]["label"]
-            self.assertTrue("text1" in label_dict)
-            self.assertTrue("checkbox1" in label_dict)
+            if aset_key != "existing_annotator1":
+                self.assertTrue("name" in aset_data)
+                self.assertTrue("annotations" in aset_data)
+                self.assertEqual(len(aset_data["annotations"]), 1)
+                anno_dict = aset_data["annotations"][0]
+                self.assertTrue("type" in anno_dict)
+                self.assertTrue("start" in anno_dict)
+                self.assertTrue("end" in anno_dict)
+                self.assertTrue("id" in anno_dict)
+                self.assertTrue("features" in anno_dict)
+                features_dict = anno_dict["features"]
+                self.assertTrue("text1" in features_dict)
+                self.assertTrue("checkbox1" in features_dict)
+            else:
+                # Check that existing annotation from document upload is carried over
+                self.assertEqual(aset_data["annotations"][0]["features"]["sentiment"], "positive")
+
+
+
+
+    def check_teamware_status(self, containing_dict, expected_value):
+        self.assertTrue("teamware_status" in containing_dict)
+        teamware_status = containing_dict["teamware_status"]
+        if isinstance(expected_value, str):
+            self.assertEqual(teamware_status["rejected_by"], expected_value)
+            self.assertEqual(teamware_status["aborted"], expected_value)
+            self.assertEqual(teamware_status["timed_out"], expected_value)
+        else:
+            self.assertSetEqual(set(teamware_status["rejected_by"]), set(expected_value))
+            self.assertSetEqual(set(teamware_status["aborted"]), set(expected_value))
+            self.assertSetEqual(set(teamware_status["timed_out"]), set(expected_value))
 
     def test_export_csv(self):
 
@@ -1203,40 +1304,77 @@ def test_export_csv(self):
             self.assertTrue("feature2" in doc_dict)
             self.assertTrue("feature3" in doc_dict)
             self.assertTrue("annotations" in doc_dict)
-            self.assertTrue(len(doc_dict["annotations"]) == 3)
+            self.assertEqual(len(doc_dict["annotations"]), 4, doc_dict)
             anno_set_dict = doc_dict["annotations"]
             for set_key in anno_set_dict:
-                self.assertTrue(isinstance(anno_set_dict[set_key]["text1"], str))
-                self.assertTrue(isinstance(anno_set_dict[set_key]["checkbox1"], str))
+                if set_key != "existing_annotator1":
+                    self.assertTrue(isinstance(anno_set_dict[set_key]["text1"], str))
+                    self.assertTrue(isinstance(anno_set_dict[set_key]["checkbox1"], str))
+                else:
+                    self.assertEqual(anno_set_dict[set_key]["sentiment"], "positive")
+
+            self.check_teamware_status(doc_dict, ",".join(str(i) for i in self.anon_annotator_names))
 
     def test_export_raw_anonymized(self):
 
         for document in self.project.documents.all():
+            # Mask any existing annotations that came with the document upload
+            document.data.pop("annotation_sets")
+            document.save()
+
             doc_dict = document.get_doc_annotation_dict("raw", anonymize=True)
 
             for aset_key, aset_data in doc_dict["annotation_sets"].items():
-                self.assertTrue(isinstance(aset_data.get("name", None), int))
+                self.assertFalse(aset_key.startswith(self.unanonymized_prefix))
+                self.assertFalse(aset_data.get("name", None).startswith(self.unanonymized_prefix))
+
+            self.check_teamware_status(doc_dict, self.anon_annotator_names)
 
     def test_export_raw_deanonymized(self):
 
         for document in self.project.documents.all():
+            # Mask any existing annotations that came with the document upload
+            document.data.pop("annotation_sets")
+            document.save()
+
             doc_dict = document.get_doc_annotation_dict("raw", anonymize=False)
 
             for aset_key, aset_data in doc_dict["annotation_sets"].items():
-                self.assertTrue(isinstance(aset_data.get("name", None), str))
+                self.assertTrue(aset_key.startswith(self.unanonymized_prefix))
+                self.assertTrue(aset_data.get("name", None).startswith(self.unanonymized_prefix))
+
+            # for non-anonymized export the rejected/aborted/timed_out status
+            # uses names rather than ID numbers
+            self.check_teamware_status(doc_dict, self.annotator_names)
 
     def test_export_gate_anonymized(self):
 
         for document in self.project.documents.all():
+            # Mask any existing annotations that came with the document upload
+            document.data.pop("annotation_sets")
+            document.save()
+
             doc_dict = document.get_doc_annotation_dict("gate", anonymize=True)
 
             for aset_key, aset_data in doc_dict["annotation_sets"].items():
-                self.assertTrue(isinstance(aset_data.get("name", None), int))
+                self.assertFalse(aset_key.startswith(self.unanonymized_prefix))
+                self.assertFalse(aset_data.get("name", None).startswith(self.unanonymized_prefix))
+
+            self.check_teamware_status(doc_dict["features"], self.anon_annotator_names)
 
     def test_export_gate_deanonymized(self):
 
         for document in self.project.documents.all():
+            # Mask any existing annotations that came with the document upload
+            document.data.pop("annotation_sets")
+            document.save()
+
             doc_dict = document.get_doc_annotation_dict("gate", anonymize=False)
 
             for aset_key, aset_data in doc_dict["annotation_sets"].items():
-                self.assertTrue(isinstance(aset_data.get("name", None), str))
+                self.assertTrue(aset_key.startswith(self.unanonymized_prefix))
+                self.assertTrue(aset_data.get("name", None).startswith(self.unanonymized_prefix))
+
+            # for non-anonymized export the rejected/aborted/timed_out status
+            # uses names rather than ID numbers
+            self.check_teamware_status(doc_dict["features"], self.annotator_names)
diff --git a/backend/tests/test_rpc_endpoints.py b/backend/tests/test_rpc_endpoints.py
@@ -620,6 +620,10 @@ def test_get_projects(self):
         self.assertEqual(len(result["items"]), 1)
         self.assertEqual(result["total_count"], 1)
 
+        # Ensure filtering is case-insensitive
+        result = get_projects(self.get_loggedin_request(), 1, page_size, "pROJECT 1")
+        self.assertEqual(len(result["items"]), 1)
+        self.assertEqual(result["total_count"], 1)