Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dependency updates #398

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
a71d6ae
Upgrade a number of dependencies
ianroberts Jan 21, 2024
9c0491e
Vitest upgrade changes coverage plugin from "-c8" to "-v8"
ianroberts Jan 21, 2024
412d1ed
See if adding a couple of seconds delay gives the doc format preferen…
ianroberts Jan 21, 2024
45b26e2
Put the waits either side of clicking "CSV"
ianroberts Jan 21, 2024
0e64b88
Merge pull request #397 from GateNLP/dependency-upgrades
ianroberts Jan 21, 2024
641eeaf
Include rejected/aborted/timed out annotations in export
ianroberts Feb 23, 2024
e915ee5
Copy the self.data dict when generating exports
ianroberts Feb 23, 2024
41fd491
Documentation for the teamware_status information
ianroberts Feb 23, 2024
834af01
Merge pull request #399 from GateNLP/export-rejected
ianroberts Feb 26, 2024
0cfed45
Resolves #345 fixed username anonymization
twinkarma May 25, 2023
d91a554
Use the same ANONYMIZATION_PREFIX in teamware_status section
ianroberts Feb 26, 2024
f650d87
#346 Prevent double nesting of features field
twinkarma May 25, 2023
b94e22d
#348 merge existing and new annotation fields
twinkarma May 26, 2023
d34021d
Fixed implementation of export tests
twinkarma May 26, 2023
e289227
Updated docs on how the documents and annotation are now exported
twinkarma May 26, 2023
6993266
Merge pull request #377 from GateNLP/various-export-issues
ianroberts Feb 26, 2024
53b9a56
Allow an explicit "none" for no email security rather than just relyi…
ianroberts Mar 11, 2024
ce57783
Make project search case insensitive
freddyheppell Mar 13, 2024
b67e615
Merge pull request #406 from GateNLP/case-insensitive-project-search
ianroberts Mar 13, 2024
24b025b
Make "search by username or email" on user admin page case insensitive
ianroberts Mar 13, 2024
4585c71
Merge pull request #407 from GateNLP/user-search-case-insensitive
ianroberts Mar 13, 2024
fdafb21
Merge pull request #402 from GateNLP/smtp-no-tls
ianroberts May 8, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 38 additions & 13 deletions backend/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -978,25 +978,28 @@ def get_doc_annotation_dict(self, json_format="raw", anonymize=True):
# Create dictionary for document
doc_dict = None
if json_format == "raw" or json_format == "csv":
doc_dict = self.data
doc_dict = self.data.copy()
elif json_format == "gate":
# GATE json format are expected to have an existing "features" field
features_dict = dict(self.data["features"]) if "features" in self.data and isinstance(self.data["features"], dict) else {}

ignore_keys = {"text", self.project.document_id_field}
features_dict = {key: value for key, value in self.data.items() if key not in ignore_keys}
# Add any non-compliant top-level fields into the "features" field instead
ignore_keys = {"text", "features", "offset_type", "annotation_sets", self.project.document_id_field}
features_dict.update({key: value for key, value in self.data.items() if key not in ignore_keys})

doc_dict = {
"text": self.data["text"],
"features": features_dict,
"offset_type": "p",
"offset_type": self.data["offset_type"] if "offset_type" in self.data else "p", # Use original offset type
"name": get_value_from_key_path(self.data, self.project.document_id_field)
}
pass

# Insert annotation sets into the doc dict
annotations = self.annotations.filter(status=Annotation.COMPLETED)
if json_format == "csv":
# Gets pre-existing annotations
annotation_sets = dict(self.data["annotations"]) if "annotations" in self.data else {}
# Format annotations for CSV export
annotation_sets = {}
for annotation in annotations:
a_data = annotation.data
annotation_dict = {}
Expand All @@ -1009,36 +1012,58 @@ def get_doc_annotation_dict(self, json_format="raw", anonymize=True):
annotation_dict["duration_seconds"] = annotation.time_to_complete

if anonymize:
annotation_sets[str(annotation.user.id)] = annotation_dict
annotation_sets[f"{settings.ANONYMIZATION_PREFIX}{annotation.user.id}"] = annotation_dict
else:
annotation_sets[annotation.user.username] = annotation_dict

doc_dict["annotations"] = annotation_sets

else:
# Gets pre-existing annotations
annotation_sets = dict(self.data["annotation_sets"]) if "annotation_sets" in self.data else {}
# Format for JSON in line with GATE formatting
annotation_sets = {}
for annotation in annotations:
a_data = annotation.data
anonymized_name = f"{settings.ANONYMIZATION_PREFIX}{annotation.user.id}"
annotation_set = {
"name": annotation.user.id if anonymize else annotation.user.username,
"name": anonymized_name if anonymize else annotation.user.username,
"annotations": [
{
"type": "Document",
"start": 0,
"end": 0,
"id": 0,
"duration_seconds": annotation.time_to_complete,
"features": {
"label": a_data
}
"features": a_data
}
],
"next_annid": 1,
}
annotation_sets[annotation.user.username] = annotation_set
annotation_sets[anonymized_name if anonymize else annotation.user.username] = annotation_set

doc_dict["annotation_sets"] = annotation_sets

# Add to the export the lists (possibly empty) of users who rejected,
# timed out or aborted annotation of this document
teamware_status = {}
for key, status in [
("rejected_by", Annotation.REJECTED),
("timed_out", Annotation.TIMED_OUT),
("aborted", Annotation.ABORTED),
]:
teamware_status[key] = [
f"{settings.ANONYMIZATION_PREFIX}{annotation.user.id}" if anonymize else annotation.user.username
for annotation in self.annotations.filter(status=status)
]
if json_format == "csv":
# Flatten list if exporting as CSV
teamware_status[key] = ",".join(str(val) for val in teamware_status[key])

if json_format == "gate":
doc_dict["features"]["teamware_status"] = teamware_status
else:
doc_dict["teamware_status"] = teamware_status

return doc_dict


Expand Down
2 changes: 1 addition & 1 deletion backend/rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ def get_projects(request, current_page=1, page_size=None, filters=None):
# Perform filtering
if isinstance(filters, str):
# Search project title if is filter is a string only
projects_query = Project.objects.filter(name__contains=filters.strip())
projects_query = Project.objects.filter(name__icontains=filters.strip())
total_count = projects_query.count()
else:
projects_query = Project.objects.all()
Expand Down
184 changes: 161 additions & 23 deletions backend/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1098,8 +1098,11 @@ def test_get_annotations_for_user_in_project(self):
class TestDocumentAnnotationModelExport(TestCase):

def setUp(self):
self.unanonymized_prefix = "namedperson"
self.test_user = get_user_model().objects.create(username="project_creator")
self.annotators = [get_user_model().objects.create(username=f"anno{i}") for i in range(3)]
self.annotator_names = [f"{self.unanonymized_prefix}{i}" for i in range(3)]
self.annotators = [get_user_model().objects.create(username=u) for u in self.annotator_names]
self.anon_annotator_names = [f"{settings.ANONYMIZATION_PREFIX}{a.id}" for a in self.annotators]
self.project = Project.objects.create(owner=self.test_user)
for i in range(10):
document = Document.objects.create(
Expand All @@ -1110,6 +1113,55 @@ def setUp(self):
"feature1": "Testvalue 1",
"feature2": "Testvalue 1",
"feature3": "Testvalue 1",
"features": {
"gate_format_feature1": "Gate feature test value",
"gate_format_feature2": "Gate feature test value",
"gate_format_feature3": "Gate feature test value",
},
"offset_type": "x",
"annotations": {
"existing_annotator1": {
"sentiment": "positive"
},
f"{settings.ANONYMIZATION_PREFIX}{self.annotators[0].pk}": {
"sentiment": "positive"
}

},
"annotation_sets": {
"existing_annotator1": {
"name": "existing_annotator1",
"annotations": [
{
"type": "Document",
"start": 0,
"end": 10,
"id": 0,
"features": {
"sentiment": "positive"
}
}
],
"next_annid": 1
},
f"{settings.ANONYMIZATION_PREFIX}{self.annotators[0].pk}": {
"name": f"{settings.ANONYMIZATION_PREFIX}{self.annotators[0].pk}",
"annotations": [
{
"type": "Document",
"start": 0,
"end": 10,
"id": 0,
"features": {
"sentiment": "positive"
}
}
],
"next_annid": 1
}

}


}
)
Expand Down Expand Up @@ -1145,51 +1197,100 @@ def setUp(self):
def test_export_raw(self):

for document in self.project.documents.all():
# Fields should remain exactly the same as what's been uploaded
# aside from annotation_sets
doc_dict = document.get_doc_annotation_dict("raw")
print(doc_dict)
self.assertTrue("id" in doc_dict)
self.assertTrue("text" in doc_dict)
self.assertTrue("feature1" in doc_dict)
self.assertTrue("feature2" in doc_dict)
self.assertTrue("feature3" in doc_dict)
self.assertTrue("features" in doc_dict)
self.assertTrue("offset_type" in doc_dict)
self.assertTrue("annotations" in doc_dict)
doc_features = doc_dict["features"]
self.assertTrue("gate_format_feature1" in doc_features)
self.assertTrue("gate_format_feature2" in doc_features)
self.assertTrue("gate_format_feature3" in doc_features)


self.check_raw_gate_annotation_formatting(doc_dict)
self.check_teamware_status(doc_dict, self.anon_annotator_names)

def test_export_gate(self):

for document in self.project.documents.all():
# All top-level fields apart from name, text, features and annotation_sets should be
# nested inside the features field
doc_dict = document.get_doc_annotation_dict("gate")
print(doc_dict)

self.assertTrue("text" in doc_dict)
self.assertTrue("features" in doc_dict)
self.assertFalse("annotations" in doc_dict)
self.assertEqual(doc_dict["offset_type"], "x")
doc_features = doc_dict["features"]
self.assertTrue("id" in doc_features)
self.assertTrue("feature1" in doc_features)
self.assertTrue("feature2" in doc_features)
self.assertTrue("feature3" in doc_features)
self.assertTrue("annotations" in doc_features)
self.assertFalse("features" in doc_features, "Double nesting of features field")
self.assertFalse("offset_type" in doc_features, "Double nesting of offset_type field")
self.assertTrue("gate_format_feature1" in doc_features)
self.assertTrue("gate_format_feature2" in doc_features)
self.assertTrue("gate_format_feature3" in doc_features)

self.check_raw_gate_annotation_formatting(doc_dict)
self.check_teamware_status(doc_features, self.anon_annotator_names)

def test_export_gate_with_no_offset_type(self):

for document in self.project.documents.all():
document.data.pop("offset_type")

def check_raw_gate_annotation_formatting(self, doc_dict):
doc_dict = document.get_doc_annotation_dict("gate")
self.assertEqual(doc_dict["offset_type"], "p", "offset_type should default to p")


def check_raw_gate_annotation_formatting(self, doc_dict: dict):
self.assertTrue("annotation_sets" in doc_dict)
self.assertTrue(len(doc_dict["annotation_sets"]) == 3)
self.assertEqual(len(doc_dict["annotation_sets"]), 4, doc_dict)

# Test annotation formatting
for aset_key, aset_data in doc_dict["annotation_sets"].items():
self.assertTrue("name" in aset_data)
self.assertTrue("annotations" in aset_data)
self.assertEqual(len(aset_data["annotations"]), 1)
anno_dict = aset_data["annotations"][0]
self.assertTrue("type" in anno_dict)
self.assertTrue("start" in anno_dict)
self.assertTrue("end" in anno_dict)
self.assertTrue("id" in anno_dict)
self.assertTrue("features" in anno_dict)
self.assertTrue("label" in anno_dict["features"])
label_dict = anno_dict["features"]["label"]
self.assertTrue("text1" in label_dict)
self.assertTrue("checkbox1" in label_dict)
if aset_key != "existing_annotator1":
self.assertTrue("name" in aset_data)
self.assertTrue("annotations" in aset_data)
self.assertEqual(len(aset_data["annotations"]), 1)
anno_dict = aset_data["annotations"][0]
self.assertTrue("type" in anno_dict)
self.assertTrue("start" in anno_dict)
self.assertTrue("end" in anno_dict)
self.assertTrue("id" in anno_dict)
self.assertTrue("features" in anno_dict)
features_dict = anno_dict["features"]
self.assertTrue("text1" in features_dict)
self.assertTrue("checkbox1" in features_dict)
else:
# Check that existing annotation from document upload is carried over
self.assertEqual(aset_data["annotations"][0]["features"]["sentiment"], "positive")




def check_teamware_status(self, containing_dict, expected_value):
self.assertTrue("teamware_status" in containing_dict)
teamware_status = containing_dict["teamware_status"]
if isinstance(expected_value, str):
self.assertEqual(teamware_status["rejected_by"], expected_value)
self.assertEqual(teamware_status["aborted"], expected_value)
self.assertEqual(teamware_status["timed_out"], expected_value)
else:
self.assertSetEqual(set(teamware_status["rejected_by"]), set(expected_value))
self.assertSetEqual(set(teamware_status["aborted"]), set(expected_value))
self.assertSetEqual(set(teamware_status["timed_out"]), set(expected_value))

def test_export_csv(self):

Expand All @@ -1203,40 +1304,77 @@ def test_export_csv(self):
self.assertTrue("feature2" in doc_dict)
self.assertTrue("feature3" in doc_dict)
self.assertTrue("annotations" in doc_dict)
self.assertTrue(len(doc_dict["annotations"]) == 3)
self.assertEqual(len(doc_dict["annotations"]), 4, doc_dict)
anno_set_dict = doc_dict["annotations"]
for set_key in anno_set_dict:
self.assertTrue(isinstance(anno_set_dict[set_key]["text1"], str))
self.assertTrue(isinstance(anno_set_dict[set_key]["checkbox1"], str))
if set_key != "existing_annotator1":
self.assertTrue(isinstance(anno_set_dict[set_key]["text1"], str))
self.assertTrue(isinstance(anno_set_dict[set_key]["checkbox1"], str))
else:
self.assertEqual(anno_set_dict[set_key]["sentiment"], "positive")

self.check_teamware_status(doc_dict, ",".join(str(i) for i in self.anon_annotator_names))

def test_export_raw_anonymized(self):

for document in self.project.documents.all():
# Mask any existing annotations that came with the document upload
document.data.pop("annotation_sets")
document.save()

doc_dict = document.get_doc_annotation_dict("raw", anonymize=True)

for aset_key, aset_data in doc_dict["annotation_sets"].items():
self.assertTrue(isinstance(aset_data.get("name", None), int))
self.assertFalse(aset_key.startswith(self.unanonymized_prefix))
self.assertFalse(aset_data.get("name", None).startswith(self.unanonymized_prefix))

self.check_teamware_status(doc_dict, self.anon_annotator_names)

def test_export_raw_deanonymized(self):

for document in self.project.documents.all():
# Mask any existing annotations that came with the document upload
document.data.pop("annotation_sets")
document.save()

doc_dict = document.get_doc_annotation_dict("raw", anonymize=False)

for aset_key, aset_data in doc_dict["annotation_sets"].items():
self.assertTrue(isinstance(aset_data.get("name", None), str))
self.assertTrue(aset_key.startswith(self.unanonymized_prefix))
self.assertTrue(aset_data.get("name", None).startswith(self.unanonymized_prefix))

# for non-anonymized export the rejected/aborted/timed_out status
# uses names rather than ID numbers
self.check_teamware_status(doc_dict, self.annotator_names)

def test_export_gate_anonymized(self):

for document in self.project.documents.all():
# Mask any existing annotations that came with the document upload
document.data.pop("annotation_sets")
document.save()

doc_dict = document.get_doc_annotation_dict("gate", anonymize=True)

for aset_key, aset_data in doc_dict["annotation_sets"].items():
self.assertTrue(isinstance(aset_data.get("name", None), int))
self.assertFalse(aset_key.startswith(self.unanonymized_prefix))
self.assertFalse(aset_data.get("name", None).startswith(self.unanonymized_prefix))

self.check_teamware_status(doc_dict["features"], self.anon_annotator_names)

def test_export_gate_deanonymized(self):

for document in self.project.documents.all():
# Mask any existing annotations that came with the document upload
document.data.pop("annotation_sets")
document.save()

doc_dict = document.get_doc_annotation_dict("gate", anonymize=False)

for aset_key, aset_data in doc_dict["annotation_sets"].items():
self.assertTrue(isinstance(aset_data.get("name", None), str))
self.assertTrue(aset_key.startswith(self.unanonymized_prefix))
self.assertTrue(aset_data.get("name", None).startswith(self.unanonymized_prefix))

# for non-anonymized export the rejected/aborted/timed_out status
# uses names rather than ID numbers
self.check_teamware_status(doc_dict["features"], self.annotator_names)
4 changes: 4 additions & 0 deletions backend/tests/test_rpc_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,10 @@ def test_get_projects(self):
self.assertEqual(len(result["items"]), 1)
self.assertEqual(result["total_count"], 1)

# Ensure filtering is case-insensitive
result = get_projects(self.get_loggedin_request(), 1, page_size, "pROJECT 1")
self.assertEqual(len(result["items"]), 1)
self.assertEqual(result["total_count"], 1)



Expand Down
Loading
Loading