From f1340dc43bfc1496c476257bc8c5d665213d3e9a Mon Sep 17 00:00:00 2001 From: william Date: Mon, 9 Sep 2024 09:56:53 +1000 Subject: [PATCH 01/11] add external subject/specimen id --- .../stacks/metadata-manager/README.md | 34 ++++++------ ...oricalspecimen_lab_specimen_id_and_more.py | 53 +++++++++++++++++++ .../app/models/lab/specimen.py | 11 ++-- .../app/models/lab/subject.py | 6 ++- .../metadata-manager/docs/schema.drawio.svg | 2 +- .../proc/service/tracking_sheet_srv.py | 14 ++--- .../proc/tests/test_tracking_sheet_srv.py | 20 +++---- 7 files changed, 102 insertions(+), 38 deletions(-) create mode 100644 lib/workload/stateless/stacks/metadata-manager/app/migrations/0002_rename_specimen_id_historicalspecimen_lab_specimen_id_and_more.py diff --git a/lib/workload/stateless/stacks/metadata-manager/README.md b/lib/workload/stateless/stacks/metadata-manager/README.md index 77242c0e0..81720da00 100644 --- a/lib/workload/stateless/stacks/metadata-manager/README.md +++ b/lib/workload/stateless/stacks/metadata-manager/README.md @@ -32,8 +32,8 @@ An example of how to use a curl command to access the production API: curl -s -H "Authorization: Bearer $ORCABUS_TOKEN" "https://metadata.umccr.org/api/v1/library" | jq ``` -Filtering of results is also supported by the API. For example, to filter by `internal_id`, append the query parameter -to the URL: `.../library?library_id=LIB001` +Filtering of results is also supported by the API. For example, to filter by `libraryId`, append the query parameter +to the URL: `.../library?libraryId=LIB001` ## Schema @@ -57,20 +57,22 @@ In the near future, we might introduce different ways to load data into the appl loading data from the Google tracking sheet and mapping it to its respective model as follows. -| Sheet Header | Table | Field Name | -|--------------|------------|---------------| -| SubjectID | `Subject` | subject_id | -| SampleID | `Specimen` | sample_id | -| Source | `Specimen` | source | -| LibraryID | `Library` | library_id | -| Phenotype | `Library` | phenotype | -| Workflow | `Library` | workflow | -| Quality | `Library` | quality | -| Type | `Library` | type | -| Coverage (X) | `Library` | coverage | -| Assay | `Library` | assay | -| ProjectOwner | `Library` | project_owner | -| ProjectName | `Library` | project_name | +| Sheet Header | Table | Field Name | +|-------------------|------------|---------------------| +| SubjectID | `Subject` | lab_subject_id | +| ExternalSubjectID | `Subject` | external_subject_id | +| SampleID | `Specimen` | sample_id | +| ExternalSampleID | `Specimen` | external_sample_id | +| Source | `Specimen` | source | +| LibraryID | `Library` | library_id | +| Phenotype | `Library` | phenotype | +| Workflow | `Library` | workflow | +| Quality | `Library` | quality | +| Type | `Library` | type | +| Coverage (X) | `Library` | coverage | +| Assay | `Library` | assay | +| ProjectOwner | `Library` | project_owner | +| ProjectName | `Library` | project_name | Some important notes of the sync: diff --git a/lib/workload/stateless/stacks/metadata-manager/app/migrations/0002_rename_specimen_id_historicalspecimen_lab_specimen_id_and_more.py b/lib/workload/stateless/stacks/metadata-manager/app/migrations/0002_rename_specimen_id_historicalspecimen_lab_specimen_id_and_more.py new file mode 100644 index 000000000..2438e03de --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/app/migrations/0002_rename_specimen_id_historicalspecimen_lab_specimen_id_and_more.py @@ -0,0 +1,53 @@ +# Generated by Django 5.1 on 2024-09-08 23:21 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('app', '0001_initial'), + ] + + operations = [ + migrations.RenameField( + model_name='historicalspecimen', + old_name='specimen_id', + new_name='lab_specimen_id', + ), + migrations.RenameField( + model_name='historicalsubject', + old_name='subject_id', + new_name='lab_subject_id', + ), + migrations.RenameField( + model_name='specimen', + old_name='specimen_id', + new_name='lab_specimen_id', + ), + migrations.RenameField( + model_name='subject', + old_name='subject_id', + new_name='lab_subject_id', + ), + migrations.AddField( + model_name='historicalspecimen', + name='external_specimen_id', + field=models.CharField(blank=True, null=True), + ), + migrations.AddField( + model_name='historicalsubject', + name='external_subject_id', + field=models.CharField(blank=True, null=True), + ), + migrations.AddField( + model_name='specimen', + name='external_specimen_id', + field=models.CharField(blank=True, null=True), + ), + migrations.AddField( + model_name='subject', + name='external_subject_id', + field=models.CharField(blank=True, null=True), + ), + ] diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/specimen.py b/lib/workload/stateless/stacks/metadata-manager/app/models/lab/specimen.py index c45f2a47d..a117135c9 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/specimen.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/lab/specimen.py @@ -1,9 +1,5 @@ -import logging - import ulid -from django.core.validators import RegexValidator from django.db import models -from django.db.models import QuerySet from simple_history.models import HistoricalRecords from app.models.base import BaseModel, BaseManager @@ -41,11 +37,16 @@ class Specimen(BaseModel): objects = SpecimenManager() - specimen_id = models.CharField( + lab_specimen_id = models.CharField( unique=True, blank=True, null=True ) + external_specimen_id = models.CharField( + blank=True, + null=True + ) + source = models.CharField(choices=Source.choices, blank=True, null=True) subject = models.ForeignKey(Subject, on_delete=models.SET_NULL, blank=True, null=True) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/subject.py b/lib/workload/stateless/stacks/metadata-manager/app/models/lab/subject.py index d29627593..183fae063 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/subject.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/lab/subject.py @@ -14,11 +14,15 @@ class Subject(BaseModel): orcabus_id_prefix = 'sbj' objects = SubjectManager() - subject_id = models.CharField( + lab_subject_id = models.CharField( unique=True, blank=True, null=True ) + external_subject_id = models.CharField( + blank=True, + null=True + ) history = HistoricalRecords() def save(self, *args, **kwargs): diff --git a/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg b/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg index eae1bcba8..03b4b421f 100644 --- a/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg +++ b/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg @@ -1,4 +1,4 @@ -LibraryPKorcabus_idlibrary_idphenotypeworkflow qualitytypeassaycoverageproject_nameproject_ownerSubjectPKorcabus_idsubject_idSpecimenPKorcabus_idspecimen_idsource \ No newline at end of file +LibraryPKorcabus_idlibrary_idphenotypeworkflow qualitytypeassaycoverageproject_nameproject_ownerSubjectPKorcabus_idlab_subject_idexternal_subject_idSpecimenPKorcabus_idlab_specimen_idexternal_specimen_idsource \ No newline at end of file diff --git a/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py b/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py index c7bb8745c..e52f3ddd3 100644 --- a/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py +++ b/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py @@ -52,11 +52,11 @@ def persist_lab_metadata(df: pd.DataFrame): library_deleted.append(lib) lib.delete() - for spc in Specimen.objects.exclude(specimen_id__in=df['sample_id'].tolist()).iterator(): + for spc in Specimen.objects.exclude(lab_specimen_id__in=df['sample_id'].tolist()).iterator(): specimen_deleted.append(spc) spc.delete() - for sbj in Subject.objects.exclude(subject_id__in=df['subject_id'].tolist()).iterator(): + for sbj in Subject.objects.exclude(lab_subject_id__in=df['subject_id'].tolist()).iterator(): subject_deleted.append(sbj) sbj.delete() @@ -99,9 +99,10 @@ def persist_lab_metadata(df: pd.DataFrame): try: # 1. update or create all data in the model from the given record subject, is_sub_created = Subject.objects.update_or_create( - subject_id=record.get('subject_id'), + lab_subject_id=record.get('subject_id'), defaults={ - "subject_id": record.get('subject_id') + "lab_subject_id": record.get('subject_id'), + "external_subject_id": record.get('external_subject_id') } ) if is_sub_created: @@ -110,9 +111,10 @@ def persist_lab_metadata(df: pd.DataFrame): subject_updated.append(subject) specimen, is_spc_created = Specimen.objects.update_or_create( - specimen_id=record.get('sample_id'), + lab_specimen_id=record.get('sample_id'), defaults={ - "specimen_id": record.get('sample_id'), + "lab_specimen_id": record.get('sample_id'), + "external_specimen_id": record.get('external_sample_id'), "source": get_value_from_human_readable_label(Source.choices, record.get('source')), 'subject_id': subject.orcabus_id } diff --git a/lib/workload/stateless/stacks/metadata-manager/proc/tests/test_tracking_sheet_srv.py b/lib/workload/stateless/stacks/metadata-manager/proc/tests/test_tracking_sheet_srv.py index fb70cb430..040193a9a 100644 --- a/lib/workload/stateless/stacks/metadata-manager/proc/tests/test_tracking_sheet_srv.py +++ b/lib/workload/stateless/stacks/metadata-manager/proc/tests/test_tracking_sheet_srv.py @@ -126,14 +126,16 @@ def test_persist_lab_metadata(self): self.assertEqual(lib_1.workflow, RECORD_1.get("Workflow"), "incorrect value (Workflow) stored") self.assertEqual(lib_1.project_owner, RECORD_1.get("ProjectOwner"), "incorrect value (ProjectOwner) stored") self.assertEqual(lib_1.project_name, RECORD_1.get("ProjectName"),"incorrect value (ProjectName) stored") - self.assertEqual(lib_1.specimen.specimen_id, RECORD_1.get("SampleID"), "incorrect specimen linked") + self.assertEqual(lib_1.specimen.lab_specimen_id, RECORD_1.get("SampleID"), "incorrect specimen linked") - spc_1 = Specimen.objects.get(specimen_id=RECORD_1.get("SampleID")) + spc_1 = Specimen.objects.get(lab_specimen_id=RECORD_1.get("SampleID")) self.assertIsNotNone(spc_1) self.assertEqual(spc_1.source, RECORD_1.get("Source"), "incorrect value stored") + self.assertEqual(spc_1.external_specimen_id, RECORD_1.get("ExternalSampleID"), "incorrect value stored") - sbj_1 = Subject.objects.get(subject_id=RECORD_1.get("SubjectID")) + sbj_1 = Subject.objects.get(lab_subject_id=RECORD_1.get("SubjectID")) self.assertIsNotNone(sbj_1) + self.assertEqual(sbj_1.external_subject_id, RECORD_1.get("ExternalSubjectID"), "incorrect value stored") # check relationships if lib_1 and lib_2 is in the same spc_1 spc_lib_qs = spc_1.library_set.all() @@ -145,7 +147,7 @@ def test_persist_lab_metadata(self): # check if all lib is the same with sbj_1 for rec in mock_sheet_data: lib = Library.objects.get(library_id=rec.get("LibraryID")) - self.assertEqual(lib.specimen.subject.subject_id, RECORD_1.get("SubjectID"), + self.assertEqual(lib.specimen.subject.lab_subject_id, RECORD_1.get("SubjectID"), "library is not linked to the same subject") def test_persist_lab_metadata_alter_sbj(self): @@ -164,10 +166,10 @@ def test_persist_lab_metadata_alter_sbj(self): metadata_pd = sanitize_lab_metadata_df(metadata_pd) persist_lab_metadata(metadata_pd) - sbj_4 = Subject.objects.get(subject_id=RECORD_3_DIFF_SBJ['SubjectID']) + sbj_4 = Subject.objects.get(lab_subject_id=RECORD_3_DIFF_SBJ['SubjectID']) self.assertIsNotNone(sbj_4) - spc_4 = sbj_4.specimen_set.get(specimen_id=RECORD_3_DIFF_SBJ['SampleID']) - self.assertEqual(spc_4.specimen_id, RECORD_3_DIFF_SBJ['SampleID'], + spc_4 = sbj_4.specimen_set.get(lab_specimen_id=RECORD_3_DIFF_SBJ['SampleID']) + self.assertEqual(spc_4.lab_specimen_id, RECORD_3_DIFF_SBJ['SampleID'], 'specimen obj should not change on link update') metadata_pd = pd.json_normalize([RECORD_3_DIFF_SPC]) @@ -175,7 +177,7 @@ def test_persist_lab_metadata_alter_sbj(self): persist_lab_metadata(metadata_pd) lib_3 = Library.objects.get(library_id=RECORD_3['LibraryID']) - self.assertEqual(lib_3.specimen.specimen_id, RECORD_3_DIFF_SPC['SampleID'], + self.assertEqual(lib_3.specimen.lab_specimen_id, RECORD_3_DIFF_SPC['SampleID'], 'incorrect link between lib and spc when changing links') def test_with_deleted_model(self) -> None: @@ -212,6 +214,6 @@ def test_save_choice_from_human_readable_label(self) -> None: metadata_pd = sanitize_lab_metadata_df(metadata_pd) persist_lab_metadata(metadata_pd) - spc = Specimen.objects.get(specimen_id=mock_record.get("SampleID")) + spc = Specimen.objects.get(lab_specimen_id=mock_record.get("SampleID")) self.assertIsNotNone(spc) self.assertEqual(spc.source, 'water', "incorrect value stored") From cbcd059c415f1740a24eb98bbde281c2d5557371 Mon Sep 17 00:00:00 2001 From: william Date: Mon, 9 Sep 2024 10:40:26 +1000 Subject: [PATCH 02/11] fix tests --- .../stacks/metadata-manager/README.md | 2 +- .../metadata-manager/app/tests/factories.py | 13 +++++++----- .../metadata-manager/app/tests/test_models.py | 16 +++++++-------- .../app/tests/test_viewsets.py | 20 +++++++++++-------- 4 files changed, 29 insertions(+), 22 deletions(-) diff --git a/lib/workload/stateless/stacks/metadata-manager/README.md b/lib/workload/stateless/stacks/metadata-manager/README.md index 81720da00..30e2e7af9 100644 --- a/lib/workload/stateless/stacks/metadata-manager/README.md +++ b/lib/workload/stateless/stacks/metadata-manager/README.md @@ -112,7 +112,7 @@ python3 --version Python 3.12.2 ``` -You would need to go to this microservice app directory from the root project +You would need to go to thisps microservice app directory from the root project ```bash cd lib/workload/stateless/stacks/metadata-manager diff --git a/lib/workload/stateless/stacks/metadata-manager/app/tests/factories.py b/lib/workload/stateless/stacks/metadata-manager/app/tests/factories.py index ced5e26d4..c97f0242c 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/tests/factories.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/tests/factories.py @@ -7,12 +7,13 @@ } SUBJECT_1 = { - "subject_id": "SBJ001", - "externalId": "EXTSUBIDA" + "lab_subject_id": "SBJ001", + "external_subject_id": "EXT_SUB_ID_A" } SPECIMEN_1 = { - "specimen_id": "PRJ001", + "lab_specimen_id": "PRJ001", + "external_specimen_id": "EXT_SPC_ID_A", "source": "FFPE" } @@ -33,14 +34,16 @@ class SubjectFactory(factory.django.DjangoModelFactory): class Meta: model = Subject - subject_id = SUBJECT_1['subject_id'] + lab_subject_id = SUBJECT_1['lab_subject_id'] + external_subject_id = SUBJECT_1['external_subject_id'] class SpecimenFactory(factory.django.DjangoModelFactory): class Meta: model = Specimen - specimen_id = SPECIMEN_1['specimen_id'] + lab_specimen_id = SPECIMEN_1['lab_specimen_id'] + external_specimen_id = SPECIMEN_1['external_specimen_id'] source = SPECIMEN_1['source'] diff --git a/lib/workload/stateless/stacks/metadata-manager/app/tests/test_models.py b/lib/workload/stateless/stacks/metadata-manager/app/tests/test_models.py index 46204c4b0..3dc3125dd 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/tests/test_models.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/tests/test_models.py @@ -13,13 +13,13 @@ class MetadataTestCase(TestCase): def setUp(self): subject = Subject.objects.create( orcabus_id=f'sbj.{ulid.new().str}', - subject_id='SBJ001', + lab_subject_id='SBJ001', ) subject.full_clean() subject.save() specimen = Specimen.objects.create( - specimen_id='SPC001', + lab_specimen_id='SPC001', subject=subject, ) specimen.full_clean() @@ -49,11 +49,11 @@ def test_get_simple_model(self): lib_one = Library.objects.get(library_id="L001") self.assertEqual(lib_one.library_id, "L001", "incorrect 'id' from given internal library id") - spc_one = Specimen.objects.get(specimen_id="SPC001") - self.assertEqual(spc_one.specimen_id, "SPC001", "incorrect 'id' from given internal specimen id") + spc_one = Specimen.objects.get(lab_specimen_id="SPC001") + self.assertEqual(spc_one.lab_specimen_id, "SPC001", "incorrect 'id' from given internal specimen id") - sub_one = Subject.objects.get(subject_id="SBJ001") - self.assertEqual(sub_one.subject_id, "SBJ001", "incorrect 'id' from given internal subject id") + sub_one = Subject.objects.get(lab_subject_id="SBJ001") + self.assertEqual(sub_one.lab_subject_id, "SBJ001", "incorrect 'id' from given internal subject id") def test_metadata_model_relationship(self): """ @@ -65,8 +65,8 @@ def test_metadata_model_relationship(self): # find the linked specimen spc_one = lib_one.specimen - self.assertEqual(spc_one.specimen_id, "SPC001", "incorrect specimen 'id' should linked to library") + self.assertEqual(spc_one.lab_specimen_id, "SPC001", "incorrect specimen 'id' should linked to library") # find the linked subject sub_one = spc_one.subject - self.assertEqual(sub_one.subject_id, "SBJ001", "incorrect subject 'id' linked to specimen") + self.assertEqual(sub_one.lab_subject_id, "SBJ001", "incorrect subject 'id' linked to specimen") diff --git a/lib/workload/stateless/stacks/metadata-manager/app/tests/test_viewsets.py b/lib/workload/stateless/stacks/metadata-manager/app/tests/test_viewsets.py index f302f468b..58f35bbdd 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/tests/test_viewsets.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/tests/test_viewsets.py @@ -27,30 +27,34 @@ def test_get_api(self): { "path": "library", "props": LIBRARY_1, + "lab_id_key": "library_id" }, { "path": "specimen", - "props": SPECIMEN_1 + "props": SPECIMEN_1, + "lab_id_key": "lab_specimen_id" }, { "path": "subject", - "props": SUBJECT_1 + "props": SUBJECT_1, + "lab_id_key": "lab_subject_id" } ] for model in model_to_check: path_id = model['path'] + lab_id_key = model['lab_id_key'] path = version_endpoint(path_id) logger.info(f"check API path for '{path}'") response = self.client.get(f"/{path}/") self.assertEqual(response.status_code, 200, "Ok status response is expected") - + print(lab_id_key) result_response = response.data["results"] self.assertGreater(len(result_response), 0, "A result is expected") logger.debug("Check if unique data has a single entry") - response = self.client.get(f"/{path}/?{path_id}_id={model['props'][f'{path_id}_id']}") + response = self.client.get(f"/{path}/?{lab_id_key}={model['props'][lab_id_key]}") results_response = response.data["results"] self.assertEqual( len(results_response), 1, "Single result is expected for unique data" @@ -87,8 +91,8 @@ def test_library_full_model_api(self): ) logger.debug("check if specimen and library are linked") - self.assertEqual(result_response[0]['specimen']['specimen_id'], SPECIMEN_1["specimen_id"], ) - self.assertEqual(result_response[0]['specimen']['subject']['subject_id'], SUBJECT_1["subject_id"], ) + self.assertEqual(result_response[0]['specimen']['lab_specimen_id'], SPECIMEN_1["lab_specimen_id"], ) + self.assertEqual(result_response[0]['specimen']['subject']['lab_subject_id'], SUBJECT_1["lab_subject_id"], ) def test_subject_full_model_api(self): """ @@ -105,13 +109,13 @@ def test_subject_full_model_api(self): self.assertGreater(len(result_response), 0, "A result is expected") logger.debug("Check if unique data has a single entry") - response = self.client.get(f"/{path}/?subject_id={SUBJECT_1['subject_id']}") + response = self.client.get(f"/{path}/?lab_subject_id={SUBJECT_1['lab_subject_id']}") results_response = response.data["results"] self.assertEqual( len(results_response), 1, "Single result is expected for unique data" ) logger.debug("check if specimen and library are linked") - self.assertEqual(result_response[0]['specimen_set'][0]['specimen_id'], SPECIMEN_1["specimen_id"], ) + self.assertEqual(result_response[0]['specimen_set'][0]['lab_specimen_id'], SPECIMEN_1["lab_specimen_id"], ) self.assertEqual(result_response[0]['specimen_set'][0]['library_set'][0]['library_id'], LIBRARY_1["library_id"], ) From 6599be16d998abe3316cd2a950977d63e48dd56a Mon Sep 17 00:00:00 2001 From: william Date: Mon, 9 Sep 2024 17:37:20 +1000 Subject: [PATCH 03/11] Update README.md --- .../stacks/metadata-manager/README.md | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/lib/workload/stateless/stacks/metadata-manager/README.md b/lib/workload/stateless/stacks/metadata-manager/README.md index 30e2e7af9..a5e94abe1 100644 --- a/lib/workload/stateless/stacks/metadata-manager/README.md +++ b/lib/workload/stateless/stacks/metadata-manager/README.md @@ -57,22 +57,22 @@ In the near future, we might introduce different ways to load data into the appl loading data from the Google tracking sheet and mapping it to its respective model as follows. -| Sheet Header | Table | Field Name | -|-------------------|------------|---------------------| -| SubjectID | `Subject` | lab_subject_id | -| ExternalSubjectID | `Subject` | external_subject_id | -| SampleID | `Specimen` | sample_id | -| ExternalSampleID | `Specimen` | external_sample_id | -| Source | `Specimen` | source | -| LibraryID | `Library` | library_id | -| Phenotype | `Library` | phenotype | -| Workflow | `Library` | workflow | -| Quality | `Library` | quality | -| Type | `Library` | type | -| Coverage (X) | `Library` | coverage | -| Assay | `Library` | assay | -| ProjectOwner | `Library` | project_owner | -| ProjectName | `Library` | project_name | +| Sheet Header | Table | Field Name | +|-------------------|------------|----------------------| +| SubjectID | `Subject` | lab_subject_id | +| ExternalSubjectID | `Subject` | external_subject_id | +| SampleID | `Specimen` | sample_id | +| ExternalSampleID | `Specimen` | external_specimen_id | +| Source | `Specimen` | source | +| LibraryID | `Library` | library_id | +| Phenotype | `Library` | phenotype | +| Workflow | `Library` | workflow | +| Quality | `Library` | quality | +| Type | `Library` | type | +| Coverage (X) | `Library` | coverage | +| Assay | `Library` | assay | +| ProjectOwner | `Library` | project_owner | +| ProjectName | `Library` | project_name | Some important notes of the sync: From dfd93508a2a94a32438bfbc2722c33d39e515717 Mon Sep 17 00:00:00 2001 From: william Date: Fri, 13 Sep 2024 18:27:45 +1000 Subject: [PATCH 04/11] update model --- .../stacks/metadata-manager/README.md | 2 +- .../app/migrations/0001_initial.py | 128 ++++++++++----- ...oricalspecimen_lab_specimen_id_and_more.py | 53 ------- .../metadata-manager/app/models/__init__.py | 11 +- .../metadata-manager/app/models/contact.py | 27 ++++ .../app/models/{lab => }/individual.py | 11 +- .../app/models/{lab => }/library.py | 23 ++- .../app/models/pipeline/__init__.py | 0 .../app/models/pipeline/library_run.py | 43 ----- .../metadata-manager/app/models/project.py | 30 ++++ .../app/models/{lab/specimen.py => sample.py} | 21 +-- .../app/models/{lab => }/subject.py | 7 +- .../app/models/{lab => }/utils.py | 0 .../metadata-manager/app/serializers.py | 65 -------- .../{models/lab => serializers}/__init__.py | 0 .../app/serializers/contact.py | 9 ++ .../app/serializers/library.py | 9 ++ .../app/serializers/project.py | 9 ++ .../app/serializers/sample.py | 9 ++ .../app/serializers/subject.py | 9 ++ .../metadata-manager/app/settings/aws.py | 15 +- .../metadata-manager/app/settings/it.py | 2 - .../stacks/metadata-manager/app/urls/base.py | 7 +- .../metadata-manager/app/viewsets/__init__.py | 5 + .../metadata-manager/app/viewsets/contact.py | 28 ++++ .../metadata-manager/app/viewsets/lab.py | 147 ------------------ .../metadata-manager/app/viewsets/library.py | 44 ++++++ .../metadata-manager/app/viewsets/project.py | 28 ++++ .../metadata-manager/app/viewsets/sample.py | 28 ++++ .../metadata-manager/app/viewsets/subject.py | 30 ++++ .../metadata-manager/docs/schema.drawio.svg | 2 +- .../proc/service/tracking_sheet_srv.py | 118 ++++++-------- 32 files changed, 442 insertions(+), 478 deletions(-) delete mode 100644 lib/workload/stateless/stacks/metadata-manager/app/migrations/0002_rename_specimen_id_historicalspecimen_lab_specimen_id_and_more.py create mode 100644 lib/workload/stateless/stacks/metadata-manager/app/models/contact.py rename lib/workload/stateless/stacks/metadata-manager/app/models/{lab => }/individual.py (78%) rename lib/workload/stateless/stacks/metadata-manager/app/models/{lab => }/library.py (83%) delete mode 100644 lib/workload/stateless/stacks/metadata-manager/app/models/pipeline/__init__.py delete mode 100644 lib/workload/stateless/stacks/metadata-manager/app/models/pipeline/library_run.py create mode 100644 lib/workload/stateless/stacks/metadata-manager/app/models/project.py rename lib/workload/stateless/stacks/metadata-manager/app/models/{lab/specimen.py => sample.py} (77%) rename lib/workload/stateless/stacks/metadata-manager/app/models/{lab => }/subject.py (86%) rename lib/workload/stateless/stacks/metadata-manager/app/models/{lab => }/utils.py (100%) delete mode 100644 lib/workload/stateless/stacks/metadata-manager/app/serializers.py rename lib/workload/stateless/stacks/metadata-manager/app/{models/lab => serializers}/__init__.py (100%) create mode 100644 lib/workload/stateless/stacks/metadata-manager/app/serializers/contact.py create mode 100644 lib/workload/stateless/stacks/metadata-manager/app/serializers/library.py create mode 100644 lib/workload/stateless/stacks/metadata-manager/app/serializers/project.py create mode 100644 lib/workload/stateless/stacks/metadata-manager/app/serializers/sample.py create mode 100644 lib/workload/stateless/stacks/metadata-manager/app/serializers/subject.py create mode 100644 lib/workload/stateless/stacks/metadata-manager/app/viewsets/contact.py delete mode 100644 lib/workload/stateless/stacks/metadata-manager/app/viewsets/lab.py create mode 100644 lib/workload/stateless/stacks/metadata-manager/app/viewsets/library.py create mode 100644 lib/workload/stateless/stacks/metadata-manager/app/viewsets/project.py create mode 100644 lib/workload/stateless/stacks/metadata-manager/app/viewsets/sample.py create mode 100644 lib/workload/stateless/stacks/metadata-manager/app/viewsets/subject.py diff --git a/lib/workload/stateless/stacks/metadata-manager/README.md b/lib/workload/stateless/stacks/metadata-manager/README.md index 8ab9bb45a..3f639a572 100644 --- a/lib/workload/stateless/stacks/metadata-manager/README.md +++ b/lib/workload/stateless/stacks/metadata-manager/README.md @@ -90,7 +90,7 @@ Some important notes of the sync: the latter record will be retained. 5. The sync happens every night periodically. See `./deploy/README.md` for more info. -Please refer to the [traking-sheet-service](proc/service/tracking_sheet_srv.py) implementation. +Please refer to the [tracking-sheet-service](proc/service/tracking_sheet_srv.py) implementation. ### Audit Data diff --git a/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py b/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py index 6337867eb..719beb565 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 5.1 on 2024-08-20 07:08 +# Generated by Django 5.1 on 2024-09-13 08:19 import django.core.validators import django.db.models.deletion @@ -17,10 +17,21 @@ class Migration(migrations.Migration): operations = [ migrations.CreateModel( - name='Specimen', + name='Contact', fields=[ ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), - ('specimen_id', models.CharField(blank=True, null=True, unique=True)), + ('contact_id', models.CharField(blank=True, null=True, unique=True)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='Sample', + fields=[ + ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), + ('sample_id', models.CharField(blank=True, null=True, unique=True)), + ('external_sample_id', models.CharField(blank=True, null=True)), ('source', models.CharField(blank=True, choices=[('ascites', 'Ascites'), ('blood', 'Blood'), ('bone-marrow', 'BoneMarrow'), ('buccal', 'Buccal'), ('cell-line', 'Cell_line'), ('cfDNA', 'Cfdna'), ('cyst-fluid', 'Cyst Fluid'), ('DNA', 'Dna'), ('eyebrow-hair', 'Eyebrow Hair'), ('FFPE', 'Ffpe'), ('FNA', 'Fna'), ('OCT', 'Oct'), ('organoid', 'Organoid'), ('PDX-tissue', 'Pdx Tissue'), ('plasma-serum', 'Plasma Serum'), ('RNA', 'Rna'), ('tissue', 'Tissue'), ('skin', 'Skin'), ('water', 'Water')], null=True)), ], options={ @@ -32,16 +43,78 @@ class Migration(migrations.Migration): fields=[ ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), ('subject_id', models.CharField(blank=True, null=True, unique=True)), + ('external_subject_id', models.CharField(blank=True, null=True)), ], options={ 'abstract': False, }, ), + migrations.CreateModel( + name='HistoricalContact', + fields=[ + ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), + ('contact_id', models.CharField(blank=True, db_index=True, null=True)), + ('history_id', models.AutoField(primary_key=True, serialize=False)), + ('history_date', models.DateTimeField(db_index=True)), + ('history_change_reason', models.CharField(max_length=100, null=True)), + ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), + ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ], + options={ + 'verbose_name': 'historical contact', + 'verbose_name_plural': 'historical contacts', + 'ordering': ('-history_date', '-history_id'), + 'get_latest_by': ('history_date', 'history_id'), + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), + migrations.CreateModel( + name='HistoricalProject', + fields=[ + ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), + ('project_id', models.CharField(blank=True, db_index=True, null=True)), + ('history_id', models.AutoField(primary_key=True, serialize=False)), + ('history_date', models.DateTimeField(db_index=True)), + ('history_change_reason', models.CharField(max_length=100, null=True)), + ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), + ('contact', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.contact')), + ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ], + options={ + 'verbose_name': 'historical project', + 'verbose_name_plural': 'historical projects', + 'ordering': ('-history_date', '-history_id'), + 'get_latest_by': ('history_date', 'history_id'), + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), + migrations.CreateModel( + name='HistoricalSample', + fields=[ + ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), + ('sample_id', models.CharField(blank=True, db_index=True, null=True)), + ('external_sample_id', models.CharField(blank=True, null=True)), + ('source', models.CharField(blank=True, choices=[('ascites', 'Ascites'), ('blood', 'Blood'), ('bone-marrow', 'BoneMarrow'), ('buccal', 'Buccal'), ('cell-line', 'Cell_line'), ('cfDNA', 'Cfdna'), ('cyst-fluid', 'Cyst Fluid'), ('DNA', 'Dna'), ('eyebrow-hair', 'Eyebrow Hair'), ('FFPE', 'Ffpe'), ('FNA', 'Fna'), ('OCT', 'Oct'), ('organoid', 'Organoid'), ('PDX-tissue', 'Pdx Tissue'), ('plasma-serum', 'Plasma Serum'), ('RNA', 'Rna'), ('tissue', 'Tissue'), ('skin', 'Skin'), ('water', 'Water')], null=True)), + ('history_id', models.AutoField(primary_key=True, serialize=False)), + ('history_date', models.DateTimeField(db_index=True)), + ('history_change_reason', models.CharField(max_length=100, null=True)), + ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), + ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ], + options={ + 'verbose_name': 'historical sample', + 'verbose_name_plural': 'historical samples', + 'ordering': ('-history_date', '-history_id'), + 'get_latest_by': ('history_date', 'history_id'), + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), migrations.CreateModel( name='HistoricalSubject', fields=[ ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), ('subject_id', models.CharField(blank=True, db_index=True, null=True)), + ('external_subject_id', models.CharField(blank=True, null=True)), ('history_id', models.AutoField(primary_key=True, serialize=False)), ('history_date', models.DateTimeField(db_index=True)), ('history_change_reason', models.CharField(max_length=100, null=True)), @@ -56,6 +129,17 @@ class Migration(migrations.Migration): }, bases=(simple_history.models.HistoricalChanges, models.Model), ), + migrations.CreateModel( + name='Project', + fields=[ + ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), + ('project_id', models.CharField(blank=True, null=True, unique=True)), + ('contact', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='app.contact')), + ], + options={ + 'abstract': False, + }, + ), migrations.CreateModel( name='Library', fields=[ @@ -67,9 +151,9 @@ class Migration(migrations.Migration): ('type', models.CharField(blank=True, choices=[('10X', 'Ten X'), ('BiModal', 'Bimodal'), ('ctDNA', 'Ct Dna'), ('ctTSO', 'Ct Tso'), ('exome', 'Exome'), ('MeDIP', 'Me Dip'), ('Metagenm', 'Metagenm'), ('MethylSeq', 'Methyl Seq'), ('TSO-DNA', 'TSO_DNA'), ('TSO-RNA', 'TSO_RNA'), ('WGS', 'Wgs'), ('WTS', 'Wts'), ('other', 'Other')], null=True)), ('assay', models.CharField(blank=True, null=True)), ('coverage', models.FloatField(blank=True, null=True)), - ('project_owner', models.CharField(blank=True, null=True)), - ('project_name', models.CharField(blank=True, null=True)), - ('specimen', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='app.specimen')), + ('project', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='app.project')), + ('sample', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='app.sample')), + ('subject', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='app.subject')), ], options={ 'abstract': False, @@ -86,14 +170,14 @@ class Migration(migrations.Migration): ('type', models.CharField(blank=True, choices=[('10X', 'Ten X'), ('BiModal', 'Bimodal'), ('ctDNA', 'Ct Dna'), ('ctTSO', 'Ct Tso'), ('exome', 'Exome'), ('MeDIP', 'Me Dip'), ('Metagenm', 'Metagenm'), ('MethylSeq', 'Methyl Seq'), ('TSO-DNA', 'TSO_DNA'), ('TSO-RNA', 'TSO_RNA'), ('WGS', 'Wgs'), ('WTS', 'Wts'), ('other', 'Other')], null=True)), ('assay', models.CharField(blank=True, null=True)), ('coverage', models.FloatField(blank=True, null=True)), - ('project_owner', models.CharField(blank=True, null=True)), - ('project_name', models.CharField(blank=True, null=True)), ('history_id', models.AutoField(primary_key=True, serialize=False)), ('history_date', models.DateTimeField(db_index=True)), ('history_change_reason', models.CharField(max_length=100, null=True)), ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), - ('specimen', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.specimen')), + ('project', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.project')), + ('sample', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.sample')), + ('subject', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.subject')), ], options={ 'verbose_name': 'historical library', @@ -103,30 +187,4 @@ class Migration(migrations.Migration): }, bases=(simple_history.models.HistoricalChanges, models.Model), ), - migrations.AddField( - model_name='specimen', - name='subject', - field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='app.subject'), - ), - migrations.CreateModel( - name='HistoricalSpecimen', - fields=[ - ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), - ('specimen_id', models.CharField(blank=True, db_index=True, null=True)), - ('source', models.CharField(blank=True, choices=[('ascites', 'Ascites'), ('blood', 'Blood'), ('bone-marrow', 'BoneMarrow'), ('buccal', 'Buccal'), ('cell-line', 'Cell_line'), ('cfDNA', 'Cfdna'), ('cyst-fluid', 'Cyst Fluid'), ('DNA', 'Dna'), ('eyebrow-hair', 'Eyebrow Hair'), ('FFPE', 'Ffpe'), ('FNA', 'Fna'), ('OCT', 'Oct'), ('organoid', 'Organoid'), ('PDX-tissue', 'Pdx Tissue'), ('plasma-serum', 'Plasma Serum'), ('RNA', 'Rna'), ('tissue', 'Tissue'), ('skin', 'Skin'), ('water', 'Water')], null=True)), - ('history_id', models.AutoField(primary_key=True, serialize=False)), - ('history_date', models.DateTimeField(db_index=True)), - ('history_change_reason', models.CharField(max_length=100, null=True)), - ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), - ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), - ('subject', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.subject')), - ], - options={ - 'verbose_name': 'historical specimen', - 'verbose_name_plural': 'historical specimens', - 'ordering': ('-history_date', '-history_id'), - 'get_latest_by': ('history_date', 'history_id'), - }, - bases=(simple_history.models.HistoricalChanges, models.Model), - ), ] diff --git a/lib/workload/stateless/stacks/metadata-manager/app/migrations/0002_rename_specimen_id_historicalspecimen_lab_specimen_id_and_more.py b/lib/workload/stateless/stacks/metadata-manager/app/migrations/0002_rename_specimen_id_historicalspecimen_lab_specimen_id_and_more.py deleted file mode 100644 index 2438e03de..000000000 --- a/lib/workload/stateless/stacks/metadata-manager/app/migrations/0002_rename_specimen_id_historicalspecimen_lab_specimen_id_and_more.py +++ /dev/null @@ -1,53 +0,0 @@ -# Generated by Django 5.1 on 2024-09-08 23:21 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('app', '0001_initial'), - ] - - operations = [ - migrations.RenameField( - model_name='historicalspecimen', - old_name='specimen_id', - new_name='lab_specimen_id', - ), - migrations.RenameField( - model_name='historicalsubject', - old_name='subject_id', - new_name='lab_subject_id', - ), - migrations.RenameField( - model_name='specimen', - old_name='specimen_id', - new_name='lab_specimen_id', - ), - migrations.RenameField( - model_name='subject', - old_name='subject_id', - new_name='lab_subject_id', - ), - migrations.AddField( - model_name='historicalspecimen', - name='external_specimen_id', - field=models.CharField(blank=True, null=True), - ), - migrations.AddField( - model_name='historicalsubject', - name='external_subject_id', - field=models.CharField(blank=True, null=True), - ), - migrations.AddField( - model_name='specimen', - name='external_specimen_id', - field=models.CharField(blank=True, null=True), - ), - migrations.AddField( - model_name='subject', - name='external_subject_id', - field=models.CharField(blank=True, null=True), - ), - ] diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/__init__.py b/lib/workload/stateless/stacks/metadata-manager/app/models/__init__.py index f6ea56069..7af8375e8 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/__init__.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/__init__.py @@ -1,8 +1,7 @@ # https://docs.djangoproject.com/en/5.0/topics/db/models/#organizing-models-in-a-package -from .lab.library import Library -from .lab.specimen import Specimen -from .lab.subject import Subject - -# Disabled -# from .pipeline.library_run import LibraryRun +from .library import Library +from .sample import Sample +from .subject import Subject +from .contact import Contact +from .project import Project diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/contact.py b/lib/workload/stateless/stacks/metadata-manager/app/models/contact.py new file mode 100644 index 000000000..fa6cac6a5 --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/contact.py @@ -0,0 +1,27 @@ +import ulid +from django.db import models +from simple_history.models import HistoricalRecords + +from app.models.base import BaseModel, BaseManager + + +class ContactManager(BaseManager): + pass + + +class Contact(BaseModel): + orcabus_id_prefix = 'cnt' + objects = ContactManager() + + contact_id = models.CharField( + unique=True, + blank=True, + null=True + ) + + history = HistoricalRecords() + + def save(self, *args, **kwargs): + if not self.orcabus_id: + self.orcabus_id = self.orcabus_id_prefix + '.' + ulid.new().str + super().save(*args, **kwargs) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/individual.py b/lib/workload/stateless/stacks/metadata-manager/app/models/individual.py similarity index 78% rename from lib/workload/stateless/stacks/metadata-manager/app/models/lab/individual.py rename to lib/workload/stateless/stacks/metadata-manager/app/models/individual.py index 7638f60ce..53192adfa 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/individual.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/individual.py @@ -1,30 +1,25 @@ -import logging - import ulid -from django.core.validators import RegexValidator from django.db import models -from django.db.models import QuerySet from simple_history.models import HistoricalRecords from app.models.base import BaseModel, BaseManager class IndividualManager(BaseManager): - None + pass class Individual(BaseModel): orcabus_id_prefix = 'idv' objects = IndividualManager() + history = HistoricalRecords() - internal_id = models.CharField( + individual_id = models.CharField( unique=True, blank=True, null=True ) - history = HistoricalRecords() - def save(self, *args, **kwargs): if not self.orcabus_id: self.orcabus_id = self.orcabus_id_prefix + '.' + ulid.new().str diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/library.py b/lib/workload/stateless/stacks/metadata-manager/app/models/library.py similarity index 83% rename from lib/workload/stateless/stacks/metadata-manager/app/models/lab/library.py rename to lib/workload/stateless/stacks/metadata-manager/app/models/library.py index 8caa24861..332346e81 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/library.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/library.py @@ -2,11 +2,12 @@ import ulid from django.db import models -from django.core.validators import RegexValidator from simple_history.models import HistoricalRecords from app.models.base import BaseManager, BaseModel -from app.models.lab.specimen import Specimen +from app.models.subject import Subject +from app.models.sample import Sample +from app.models.project import Project logger = logging.getLogger(__name__) @@ -51,13 +52,13 @@ class LibraryType(models.TextChoices): class LibraryManager(BaseManager): - None + pass class Library(BaseModel): orcabus_id_prefix = 'lib' - objects = LibraryManager() + history = HistoricalRecords() library_id = models.CharField( unique=True, @@ -92,17 +93,11 @@ class Library(BaseModel): blank=True, null=True ) - project_owner = models.CharField( - blank=True, - null=True - ) - project_name = models.CharField( - blank=True, - null=True - ) - specimen = models.ForeignKey(Specimen, on_delete=models.SET_NULL, blank=True, null=True) - history = HistoricalRecords() + # Relationships + sample = models.ForeignKey(Sample, on_delete=models.SET_NULL, blank=True, null=True) + subject = models.ForeignKey(Subject, on_delete=models.SET_NULL, blank=True, null=True) + project = models.ForeignKey(Project, on_delete=models.SET_NULL, blank=True, null=True) def save(self, *args, **kwargs): if not self.orcabus_id: diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/pipeline/__init__.py b/lib/workload/stateless/stacks/metadata-manager/app/models/pipeline/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/pipeline/library_run.py b/lib/workload/stateless/stacks/metadata-manager/app/models/pipeline/library_run.py deleted file mode 100644 index de72dc0b9..000000000 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/pipeline/library_run.py +++ /dev/null @@ -1,43 +0,0 @@ -import logging - -from django.db import models -from django.db.models import QuerySet - -from app.models import Library -from app.models.base import BaseModel, BaseManager - - -class LibraryRunManager(BaseManager): - None - - -# Disabled -# Uncomment from ../__init__.py to enable - -class LibraryRun(BaseModel): - objects = LibraryRunManager() - - # Possible to have its own model of sequence_run - sequence_run_id = models.CharField( - blank=True, - null=True - ) - - lane = models.PositiveSmallIntegerField( - blank=True, - null=True - ) - override_cycles = models.CharField( - blank=True, - null=True - ) - coverage_yield = models.CharField( - blank=True, - null=True - ) - qc_status = models.CharField( - blank=True, - null=True - ) - - library = models.ForeignKey(Library, on_delete=models.SET_NULL, null=True, blank=False) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/project.py b/lib/workload/stateless/stacks/metadata-manager/app/models/project.py new file mode 100644 index 000000000..3ad11fc05 --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/project.py @@ -0,0 +1,30 @@ +import ulid +from django.db import models +from simple_history.models import HistoricalRecords + +from app.models.contact import Contact +from app.models.base import BaseModel, BaseManager + + +class ProjectManager(BaseManager): + pass + + +class Project(BaseModel): + orcabus_id_prefix = 'prj' + objects = ProjectManager() + history = HistoricalRecords() + + project_id = models.CharField( + unique=True, + blank=True, + null=True + ) + + # Relationships + contact = models.ForeignKey(Contact, on_delete=models.SET_NULL, blank=True, null=True) + + def save(self, *args, **kwargs): + if not self.orcabus_id: + self.orcabus_id = self.orcabus_id_prefix + '.' + ulid.new().str + super().save(*args, **kwargs) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/specimen.py b/lib/workload/stateless/stacks/metadata-manager/app/models/sample.py similarity index 77% rename from lib/workload/stateless/stacks/metadata-manager/app/models/lab/specimen.py rename to lib/workload/stateless/stacks/metadata-manager/app/models/sample.py index a117135c9..8b79c07d7 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/specimen.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/sample.py @@ -3,7 +3,6 @@ from simple_history.models import HistoricalRecords from app.models.base import BaseModel, BaseManager -from app.models.lab.subject import Subject class Source(models.TextChoices): @@ -28,29 +27,25 @@ class Source(models.TextChoices): WATER = "water", "Water" -class SpecimenManager(BaseManager): - None +class SampleManager(BaseManager): + pass -class Specimen(BaseModel): - orcabus_id_prefix = 'spc' - - objects = SpecimenManager() +class Sample(BaseModel): + orcabus_id_prefix = 'smp' + objects = SampleManager() + history = HistoricalRecords() - lab_specimen_id = models.CharField( + sample_id = models.CharField( unique=True, blank=True, null=True ) - external_specimen_id = models.CharField( + external_sample_id = models.CharField( blank=True, null=True ) - source = models.CharField(choices=Source.choices, blank=True, null=True) - subject = models.ForeignKey(Subject, on_delete=models.SET_NULL, blank=True, null=True) - - history = HistoricalRecords() def save(self, *args, **kwargs): if not self.orcabus_id: diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/subject.py b/lib/workload/stateless/stacks/metadata-manager/app/models/subject.py similarity index 86% rename from lib/workload/stateless/stacks/metadata-manager/app/models/lab/subject.py rename to lib/workload/stateless/stacks/metadata-manager/app/models/subject.py index 183fae063..2e38882dd 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/subject.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/subject.py @@ -1,20 +1,20 @@ import ulid from django.db import models -from django.core.validators import RegexValidator from simple_history.models import HistoricalRecords from app.models.base import BaseModel, BaseManager class SubjectManager(BaseManager): - None + pass class Subject(BaseModel): orcabus_id_prefix = 'sbj' objects = SubjectManager() + history = HistoricalRecords() - lab_subject_id = models.CharField( + subject_id = models.CharField( unique=True, blank=True, null=True @@ -23,7 +23,6 @@ class Subject(BaseModel): blank=True, null=True ) - history = HistoricalRecords() def save(self, *args, **kwargs): if not self.orcabus_id: diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/utils.py b/lib/workload/stateless/stacks/metadata-manager/app/models/utils.py similarity index 100% rename from lib/workload/stateless/stacks/metadata-manager/app/models/lab/utils.py rename to lib/workload/stateless/stacks/metadata-manager/app/models/utils.py diff --git a/lib/workload/stateless/stacks/metadata-manager/app/serializers.py b/lib/workload/stateless/stacks/metadata-manager/app/serializers.py deleted file mode 100644 index 6bbcabb91..000000000 --- a/lib/workload/stateless/stacks/metadata-manager/app/serializers.py +++ /dev/null @@ -1,65 +0,0 @@ -from rest_framework import serializers - -from app.models import Subject, Specimen, Library - - -class LibrarySerializer(serializers.ModelSerializer): - class Meta: - model = Library - fields = "__all__" - - -class SpecimenSerializer(serializers.ModelSerializer): - class Meta: - model = Specimen - fields = "__all__" - - -class SubjectSerializer(serializers.ModelSerializer): - class Meta: - model = Subject - fields = "__all__" - - -class SubjectFullSerializer(serializers.ModelSerializer): - """ - This is a full Subject serializer which include all the children's (specimen and library) related models - """ - - class SpecimenLibrarySerializer(serializers.ModelSerializer): - """ - This is a full Specimen serializer which include the library model - """ - library_set = LibrarySerializer(many=True) - - class Meta: - model = Specimen - fields = "__all__" - - specimen_set = SpecimenLibrarySerializer(many=True) - - class Meta: - model = Subject - fields = "__all__" - - -class LibraryFullSerializer(serializers.ModelSerializer): - """ - This is a full Library serializer which include the specimen and subject models - """ - - class SpecimenSubjectSerializer(serializers.ModelSerializer): - """ - This is a full Specimen serializer which include the subject model - """ - subject = SubjectSerializer() - - class Meta: - model = Specimen - fields = "__all__" - - specimen = SpecimenSubjectSerializer(many=False) - - class Meta: - model = Library - fields = "__all__" diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/__init__.py b/lib/workload/stateless/stacks/metadata-manager/app/serializers/__init__.py similarity index 100% rename from lib/workload/stateless/stacks/metadata-manager/app/models/lab/__init__.py rename to lib/workload/stateless/stacks/metadata-manager/app/serializers/__init__.py diff --git a/lib/workload/stateless/stacks/metadata-manager/app/serializers/contact.py b/lib/workload/stateless/stacks/metadata-manager/app/serializers/contact.py new file mode 100644 index 000000000..f94da911e --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/app/serializers/contact.py @@ -0,0 +1,9 @@ +from rest_framework import serializers + +from app.models import Contact + + +class ContactSerializer(serializers.ModelSerializer): + class Meta: + model = Contact + fields = "__all__" diff --git a/lib/workload/stateless/stacks/metadata-manager/app/serializers/library.py b/lib/workload/stateless/stacks/metadata-manager/app/serializers/library.py new file mode 100644 index 000000000..a3248083f --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/app/serializers/library.py @@ -0,0 +1,9 @@ +from rest_framework import serializers + +from app.models import Library + + +class LibrarySerializer(serializers.ModelSerializer): + class Meta: + model = Library + fields = "__all__" diff --git a/lib/workload/stateless/stacks/metadata-manager/app/serializers/project.py b/lib/workload/stateless/stacks/metadata-manager/app/serializers/project.py new file mode 100644 index 000000000..c991be8c7 --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/app/serializers/project.py @@ -0,0 +1,9 @@ +from rest_framework import serializers + +from app.models import Project + + +class ProjectSerializer(serializers.ModelSerializer): + class Meta: + model = Project + fields = "__all__" diff --git a/lib/workload/stateless/stacks/metadata-manager/app/serializers/sample.py b/lib/workload/stateless/stacks/metadata-manager/app/serializers/sample.py new file mode 100644 index 000000000..b889ba461 --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/app/serializers/sample.py @@ -0,0 +1,9 @@ +from rest_framework import serializers + +from app.models import Sample + + +class SampleSerializer(serializers.ModelSerializer): + class Meta: + model = Sample + fields = "__all__" diff --git a/lib/workload/stateless/stacks/metadata-manager/app/serializers/subject.py b/lib/workload/stateless/stacks/metadata-manager/app/serializers/subject.py new file mode 100644 index 000000000..e9a437dcc --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/app/serializers/subject.py @@ -0,0 +1,9 @@ +from rest_framework import serializers + +from app.models import Subject + + +class SubjectSerializer(serializers.ModelSerializer): + class Meta: + model = Subject + fields = "__all__" diff --git a/lib/workload/stateless/stacks/metadata-manager/app/settings/aws.py b/lib/workload/stateless/stacks/metadata-manager/app/settings/aws.py index 9968ffbd3..2ab713d40 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/settings/aws.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/settings/aws.py @@ -12,7 +12,7 @@ from environ import Env from libumccr.aws import libsm -from .base import * # noqa +from .base import * logger = logging.getLogger(__name__) @@ -35,16 +35,11 @@ CORS_ORIGIN_ALLOW_ALL = False CORS_ALLOW_CREDENTIALS = False -# FIXME: https://github.com/umccr/infrastructure/issues/272 CORS_ALLOWED_ORIGINS = [ - "https://portal.umccr.org", - "https://portal.prod.umccr.org", - "https://portal.stg.umccr.org", - "https://portal.dev.umccr.org", - "https://data.umccr.org", - "https://data.prod.umccr.org", - "https://data.dev.umccr.org", - "https://data.stg.umccr.org", + "https://orcaui.dev.umccr.org", + "https://orcaui.stg.umccr.org", + "https://orcaui.prod.umccr.org", + "https://orcaui.umccr.org", ] CSRF_TRUSTED_ORIGINS = copy.deepcopy(CORS_ALLOWED_ORIGINS) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/settings/it.py b/lib/workload/stateless/stacks/metadata-manager/app/settings/it.py index 4d5758de6..cde77e1a0 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/settings/it.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/settings/it.py @@ -18,5 +18,3 @@ 'PORT': os.getenv('DB_PORT', 5432), } } - -DATABASES = {"default": db_conn_cfg} diff --git a/lib/workload/stateless/stacks/metadata-manager/app/urls/base.py b/lib/workload/stateless/stacks/metadata-manager/app/urls/base.py index ba87470f4..7318308c8 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/urls/base.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/urls/base.py @@ -1,7 +1,7 @@ from django.urls import path, include from app.routers import OptionalSlashDefaultRouter -from app.viewsets.lab import LibraryViewSet, SubjectViewSet, SpecimenViewSet +from app.viewsets import LibraryViewSet, SubjectViewSet, SampleViewSet, ProjectViewSet, ContactViewSet from app.settings.base import API_VERSION api_namespace = "api" @@ -10,11 +10,12 @@ router = OptionalSlashDefaultRouter() router.register(r"subject", SubjectViewSet, basename="subject") -router.register(r"specimen", SpecimenViewSet, basename="specimen") +router.register(r"sample", SampleViewSet, basename="sample") router.register(r"library", LibraryViewSet, basename="library") +router.register(r"project", ProjectViewSet, basename="project") +router.register(r"contact", ContactViewSet, basename="contact") urlpatterns = [ - # path("iam/", include(router.urls)), path(f"{api_base}", include(router.urls)), ] diff --git a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/__init__.py b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/__init__.py index e69de29bb..a6a2772b6 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/__init__.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/__init__.py @@ -0,0 +1,5 @@ +from .sample import SampleViewSet +from .library import LibraryViewSet +from .subject import SubjectViewSet +from .project import ProjectViewSet +from .contact import ContactViewSet diff --git a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/contact.py b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/contact.py new file mode 100644 index 000000000..ee30c345f --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/contact.py @@ -0,0 +1,28 @@ +from drf_spectacular.utils import extend_schema +from rest_framework import filters + +from app.models import Contact +from app.serializers.contact import ContactSerializer +from app.pagination import StandardResultsSetPagination + +from rest_framework.viewsets import ReadOnlyModelViewSet + + +class ContactViewSet(ReadOnlyModelViewSet): + lookup_value_regex = "[^/]+" + serializer_class = ContactSerializer + pagination_class = StandardResultsSetPagination + filter_backends = [filters.OrderingFilter, filters.SearchFilter] + ordering_fields = "__all__" + ordering = ["-orcabus_id"] + search_fields = Contact.get_base_fields() + queryset = Contact.objects.none() + + @extend_schema(parameters=[ + ContactSerializer + ]) + def list(self, request, *args, **kwargs): + return super().list(request, *args, **kwargs) + + def get_queryset(self): + return Contact.objects.get_by_keyword(**self.request.query_params) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/lab.py b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/lab.py deleted file mode 100644 index 082e796e6..000000000 --- a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/lab.py +++ /dev/null @@ -1,147 +0,0 @@ -from drf_spectacular.utils import extend_schema, OpenApiParameter -from rest_framework import filters -from rest_framework.viewsets import ReadOnlyModelViewSet -from rest_framework.decorators import action -from rest_framework.response import Response - -from app.models import Subject, Specimen, Library -from app.pagination import StandardResultsSetPagination -from app.serializers import SubjectSerializer, SpecimenSerializer, LibrarySerializer, SubjectFullSerializer, \ - LibraryFullSerializer - - -class SubjectViewSet(ReadOnlyModelViewSet): - lookup_value_regex = "[^/]+" - serializer_class = SubjectSerializer - pagination_class = StandardResultsSetPagination - filter_backends = [filters.OrderingFilter, filters.SearchFilter] - ordering_fields = "__all__" - ordering = ["-orcabus_id"] - search_fields = Subject.get_base_fields() - queryset = Subject.objects.none() - - @extend_schema(parameters=[ - SubjectSerializer - ]) - def list(self, request, *args, **kwargs): - return super().list(request, *args, **kwargs) - - def get_queryset(self): - return Subject.objects.get_by_keyword(**self.request.query_params) - - @extend_schema(operation_id='/api/v1/subject/full/', - responses={200: SubjectFullSerializer(many=True)}, - parameters=[ - SubjectSerializer, - OpenApiParameter(name='library_id', - description='Filter the subjects that has the given library_id in ' - 'the Library model.', - required=False, - type=str), - ], - ) - @action(detail=False, methods=['get'], url_path='full') - def get_full_model_set(self, request): - query_params = self.request.query_params.copy() - - ordering = query_params.get("ordering", '-orcabus_id') - qs = Subject.objects.prefetch_related("specimen_set__library_set").all().order_by(ordering) - - # Allow filtering by library_id - library_id = query_params.get("library_id", None) - if library_id: - query_params.pop("library_id") - qs = qs.filter(specimen__library__library_id=library_id) - - # Following same pattern with other filter where if unknown query params returns empty qs - qs = Subject.objects.get_model_fields_query(qs, **query_params) - - page = self.paginate_queryset(qs) - serializer = SubjectFullSerializer(page, many=True) - - return self.get_paginated_response(serializer.data) - - @extend_schema(operation_id='/api/v1/subject/id/full/', responses={200: SubjectFullSerializer(many=True)}) - @action(detail=True, methods=['get'], url_path='full') - def get_full_model_detail(self, request, pk=None): - subject = Subject.objects.get(orcabus_id=pk) - serializer = SubjectFullSerializer(subject) - - return Response(serializer.data) - - -class SpecimenViewSet(ReadOnlyModelViewSet): - lookup_value_regex = "[^/]+" - serializer_class = SpecimenSerializer - pagination_class = StandardResultsSetPagination - filter_backends = [filters.OrderingFilter, filters.SearchFilter] - ordering_fields = "__all__" - ordering = ["-orcabus_id"] - search_fields = Specimen.get_base_fields() - queryset = Specimen.objects.none() - - @extend_schema(parameters=[ - SpecimenSerializer - ]) - def list(self, request, *args, **kwargs): - return super().list(request, *args, **kwargs) - - def get_queryset(self): - return Specimen.objects.get_by_keyword(**self.request.query_params) - - -class LibraryViewSet(ReadOnlyModelViewSet): - lookup_value_regex = "[^/]+" - serializer_class = LibrarySerializer - pagination_class = StandardResultsSetPagination - filter_backends = [filters.OrderingFilter, filters.SearchFilter] - ordering_fields = "__all__" - ordering = ["-orcabus_id"] - search_fields = Library.get_base_fields() - queryset = Library.objects.none() - - @extend_schema(parameters=[ - LibrarySerializer - ]) - def list(self, request, *args, **kwargs): - return super().list(request, *args, **kwargs) - - def get_queryset(self): - return Library.objects.get_by_keyword(**self.request.query_params) - - @extend_schema( - parameters=[LibrarySerializer], - responses={200: LibraryFullSerializer(many=True)} - ) - @action(detail=False, methods=['get'], url_path='full') - def get_full_model_set(self, request): - query_params = self.request.query_params.copy() - - ordering = query_params.get("ordering", '-orcabus_id') - qs = Library.objects.select_related("specimen__subject").all().order_by(ordering) - - coverage__lte = query_params.get("coverage__lte", None) - if coverage__lte: - query_params.pop("coverage__lte") - qs = qs.filter(coverage__lte=coverage__lte) - - coverage__gte = query_params.get("coverage__gte", None) - if coverage__gte: - query_params.pop("coverage__gte") - qs = qs.filter(coverage__gte=coverage__gte) - - # Allow filtering by the keys inside the library model - qs = Library.objects.get_model_fields_query(qs, **query_params) - - page = self.paginate_queryset(qs) - serializer = LibraryFullSerializer(page, many=True) - - return self.get_paginated_response(serializer.data) - - @extend_schema(responses={200: LibraryFullSerializer(many=False)}) - @action(detail=True, methods=['get'], url_path='full') - def get_full_model_detail(self, request, pk=None): - lib = Library.objects.get(orcabus_id=pk) - serializer = LibraryFullSerializer(lib) - - return Response(serializer.data) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/library.py b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/library.py new file mode 100644 index 000000000..9749956dd --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/library.py @@ -0,0 +1,44 @@ +from drf_spectacular.utils import extend_schema +from rest_framework import filters + +from app.models import Library +from app.serializers.library import LibrarySerializer +from app.pagination import StandardResultsSetPagination + +from rest_framework.viewsets import ReadOnlyModelViewSet + + +class LibraryViewSet(ReadOnlyModelViewSet): + lookup_value_regex = "[^/]+" + serializer_class = LibrarySerializer + pagination_class = StandardResultsSetPagination + filter_backends = [filters.OrderingFilter, filters.SearchFilter] + ordering_fields = "__all__" + ordering = ["-orcabus_id"] + search_fields = Library.get_base_fields() + queryset = Library.objects.none() + + def get_queryset(self): + qs = Library.objects.all() + query_params = self.request.query_params.copy() + + coverage__lte = query_params.get("coverage__lte", None) + if coverage__lte: + query_params.pop("coverage__lte") + qs = qs.filter(coverage__lte=coverage__lte) + + coverage__gte = query_params.get("coverage__gte", None) + if coverage__gte: + query_params.pop("coverage__gte") + qs = qs.filter(coverage__gte=coverage__gte) + + # Continue filtering by the keys inside the library model + return Library.objects.get_model_fields_query(qs, **query_params) + + @extend_schema(parameters=[ + LibrarySerializer + ]) + def list(self, request, *args, **kwargs): + return super().list(request, *args, **kwargs) + + diff --git a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/project.py b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/project.py new file mode 100644 index 000000000..79091ec49 --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/project.py @@ -0,0 +1,28 @@ +from drf_spectacular.utils import extend_schema +from rest_framework import filters + +from app.models import Project +from app.serializers.project import ProjectSerializer +from app.pagination import StandardResultsSetPagination + +from rest_framework.viewsets import ReadOnlyModelViewSet + + +class ProjectViewSet(ReadOnlyModelViewSet): + lookup_value_regex = "[^/]+" + serializer_class = ProjectSerializer + pagination_class = StandardResultsSetPagination + filter_backends = [filters.OrderingFilter, filters.SearchFilter] + ordering_fields = "__all__" + ordering = ["-orcabus_id"] + search_fields = Project.get_base_fields() + queryset = Project.objects.none() + + @extend_schema(parameters=[ + ProjectSerializer + ]) + def list(self, request, *args, **kwargs): + return super().list(request, *args, **kwargs) + + def get_queryset(self): + return Project.objects.get_by_keyword(**self.request.query_params) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/sample.py b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/sample.py new file mode 100644 index 000000000..0a8a9740e --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/sample.py @@ -0,0 +1,28 @@ +from drf_spectacular.utils import extend_schema +from rest_framework import filters + +from app.models import Sample +from app.serializers.sample import SampleSerializer +from app.pagination import StandardResultsSetPagination + +from rest_framework.viewsets import ReadOnlyModelViewSet + + +class SampleViewSet(ReadOnlyModelViewSet): + lookup_value_regex = "[^/]+" + serializer_class = SampleSerializer + pagination_class = StandardResultsSetPagination + filter_backends = [filters.OrderingFilter, filters.SearchFilter] + ordering_fields = "__all__" + ordering = ["-orcabus_id"] + search_fields = Sample.get_base_fields() + queryset = Sample.objects.none() + + @extend_schema(parameters=[ + SampleSerializer + ]) + def list(self, request, *args, **kwargs): + return super().list(request, *args, **kwargs) + + def get_queryset(self): + return Sample.objects.get_by_keyword(**self.request.query_params) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/subject.py b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/subject.py new file mode 100644 index 000000000..ea9a1b097 --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/subject.py @@ -0,0 +1,30 @@ +from drf_spectacular.utils import extend_schema +from rest_framework import filters + +from app.models import Subject +from app.serializers.subject import SubjectSerializer +from app.pagination import StandardResultsSetPagination + +from rest_framework.viewsets import ReadOnlyModelViewSet + + +class SubjectViewSet(ReadOnlyModelViewSet): + lookup_value_regex = "[^/]+" + serializer_class = SubjectSerializer + pagination_class = StandardResultsSetPagination + filter_backends = [filters.OrderingFilter, filters.SearchFilter] + ordering_fields = "__all__" + ordering = ["-orcabus_id"] + search_fields = Subject.get_base_fields() + queryset = Subject.objects.none() + + @extend_schema(parameters=[ + SubjectSerializer + ]) + def list(self, request, *args, **kwargs): + return super().list(request, *args, **kwargs) + + def get_queryset(self): + return Subject.objects.get_by_keyword(**self.request.query_params) + + diff --git a/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg b/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg index 03b4b421f..6d2643e06 100644 --- a/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg +++ b/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg @@ -1,4 +1,4 @@ -LibraryPKorcabus_idlibrary_idphenotypeworkflow qualitytypeassaycoverageproject_nameproject_ownerSubjectPKorcabus_idlab_subject_idexternal_subject_idSpecimenPKorcabus_idlab_specimen_idexternal_specimen_idsource \ No newline at end of file +LibraryPKorcabus_idlibrary_idphenotypeworkflow qualitytypeassaycoverageSubjectPKorcabus_idsubject_idexternal_subject_idSamplePKorcabus_idsample_idexternal_sample_idProjectOwnerPKproject_ownerProjectNamePKproject_name \ No newline at end of file diff --git a/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py b/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py index 8e5fc9595..d6ed9c66c 100644 --- a/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py +++ b/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py @@ -1,21 +1,20 @@ import os import re import json -from typing import List import pandas as pd import numpy as np from django.db import transaction -from libumccr import libgdrive, libjson +from libumccr import libgdrive from libumccr.aws import libssm import logging -from app.models import Subject, Specimen, Library -from app.models.lab.library import Quality, LibraryType, Phenotype, WorkflowType -from app.models.lab.specimen import Source -from app.models.lab.utils import get_value_from_human_readable_label +from app.models import Subject, Sample, Library,Project,Contact +from app.models.library import Quality, LibraryType, Phenotype, WorkflowType +from app.models.sample import Source +from app.models.utils import get_value_from_human_readable_label from proc.service.utils import clean_model_history logger = logging.getLogger() @@ -41,9 +40,9 @@ def persist_lab_metadata(df: pd.DataFrame, sheet_year: str): library_created = list() library_updated = list() library_deleted = list() - specimen_created = list() - specimen_updated = list() - specimen_deleted = list() + sample_created = list() + sample_updated = list() + sample_deleted = list() subject_created = list() subject_updated = list() subject_deleted = list() @@ -52,7 +51,7 @@ def persist_lab_metadata(df: pd.DataFrame, sheet_year: str): # The data frame is to be the source of truth for the particular year # So we need to remove db records which are not in the data frame - # Only doing this for library records and (dangling) specimen/subject may be removed on a separate process + # Only doing this for library records and (dangling) sample/subject may be removed on a separate process # For the library_id we need craft the library_id prefix to match the year # E.g. year 2024, library_id prefix is 'L24' as what the Lab tracking sheet convention @@ -62,40 +61,6 @@ def persist_lab_metadata(df: pd.DataFrame, sheet_year: str): library_deleted.append(lib) lib.delete() - # Update: 12/07/2024. 'Subject' -> 'Specimen' is now ONE to Many, therefore the process of unliking the many to - # many is not needed. The following code is commented for future reference when the 'Individual' concept is - # introduced (which will have many-to-many as 'Individual' <-> 'Subject'). - - # # removing relation of spc <-> sbj when needed as this is the many-to-many relationship - # # adding relation between specimen and subject could be done per library records - # # but removal will need all records to consider before the removing - # # this `spc_sbj_df` will convert mapping between `sample_id` to all related `subject_id` as list - # spc_sbj_df = df.loc[:, df.columns.isin(['sample_id', 'subject_id'])] \ - # .groupby('sample_id')['subject_id'] \ - # .apply(list) \ - # .reset_index(name='subject_id_list') - # - # for record in spc_sbj_df.to_dict('records'): - # specimen_id = record.get("sample_id") - # subject_id_list = record.get("subject_id_list") - # - # try: - # spc = Specimen.objects.get(specimen_id=specimen_id) - # for sbj in spc.subjects.all().iterator(): - # if sbj.subject_id not in subject_id_list: - # spc.subjects.remove(sbj) - # - # except ObjectDoesNotExist: - # pass - # - # ... added below... - # - # # specimen <-> subject (addition only) - # try: - # specimen.subjects.get(orcabus_id=subject.orcabus_id) - # except ObjectDoesNotExist: - # specimen.subjects.add(subject) - # this the where records are updated, inserted, linked based on library_id for record in df.to_dict('records'): try: @@ -103,7 +68,8 @@ def persist_lab_metadata(df: pd.DataFrame, sheet_year: str): subject, is_sub_created, is_sub_updated = Subject.objects.update_or_create_if_needed( search_key={"subject_id": record.get('subject_id')}, data={ - "subject_id": record.get('subject_id') + "subject_id": record.get('subject_id'), + "external_subject_id": record.get('external_subject_id'), } ) if is_sub_created: @@ -111,18 +77,33 @@ def persist_lab_metadata(df: pd.DataFrame, sheet_year: str): if is_sub_updated: subject_updated.append(subject) - specimen, is_spc_created, is_spc_updated = Specimen.objects.update_or_create_if_needed( - search_key={"specimen_id": record.get('sample_id')}, + sample, is_smp_created, is_smp_updated = Sample.objects.update_or_create_if_needed( + search_key={"sample_id": record.get('sample_id')}, data={ - "specimen_id": record.get('sample_id'), + "sample_id": record.get('sample_id'), + "external_sample_id": record.get('external_sample_id'), "source": get_value_from_human_readable_label(Source.choices, record.get('source')), - 'subject_id': subject.orcabus_id } ) - if is_spc_created: - specimen_created.append(specimen) - if is_spc_updated: - specimen_updated.append(specimen) + if is_smp_created: + sample_created.append(sample) + if is_smp_updated: + sample_updated.append(sample) + + contact, _is_cnt_created, _is_cnt_updated = Contact.objects.update_or_create_if_needed( + search_key={"contact_id": record.get('project_owner')}, + data={ + "contact_id": record.get('project_owner'), + } + ) + + project, _is_prj_created, _is_prj_updated = Project.objects.update_or_create_if_needed( + search_key={"project_id": record.get('project_name')}, + data={ + "project_id": record.get('project_name'), + "contact_id": contact.orcabus_id, + } + ) library, is_lib_created, is_lib_updated = Library.objects.update_or_create_if_needed( search_key={"library_id": record.get('library_id')}, @@ -134,9 +115,12 @@ def persist_lab_metadata(df: pd.DataFrame, sheet_year: str): 'type': get_value_from_human_readable_label(LibraryType.choices, record.get('type')), 'assay': record.get('assay'), 'coverage': sanitize_library_coverage(record.get('coverage')), - 'specimen_id': specimen.orcabus_id, - 'project_owner': record.get('project_owner'), - 'project_name': record.get('project_name'), + + # relationships + 'sample_id': sample.orcabus_id, + 'subject_id': subject.orcabus_id, + 'project_id': project.orcabus_id, + } ) if is_lib_created: @@ -144,21 +128,9 @@ def persist_lab_metadata(df: pd.DataFrame, sheet_year: str): if is_lib_updated: library_updated.append(library) - # 2. linking or updating model to each other based on the record (update if it does not match) - - # library <-> specimen - if library.specimen is None or library.specimen.orcabus_id != specimen.orcabus_id: - library.specimen = specimen - library.save() - - # specimen <-> subject - if specimen.subject is None or specimen.subject.orcabus_id != subject.orcabus_id: - specimen.subject = subject - specimen.save() - except Exception as e: if any(record.values()): # silent off blank row - logger.warning(f"Invalid record ({e}): {json.dumps(record, indent=2)}") + print(f"Invalid record ({e}): {json.dumps(record, indent=2)}") rows_invalid.append(record) continue @@ -171,10 +143,10 @@ def persist_lab_metadata(df: pd.DataFrame, sheet_year: str): "update_count": len(library_updated), "delete_count": len(library_deleted) }, - "specimen": { - "new_count": len(specimen_created), - "update_count": len(specimen_updated), - "delete_count": len(specimen_deleted) + "sample": { + "new_count": len(sample_created), + "update_count": len(sample_updated), + "delete_count": len(sample_deleted) }, "subject": { From bcf2887118680243c5bb02c5a9544a1f549f4d61 Mon Sep 17 00:00:00 2001 From: william Date: Wed, 18 Sep 2024 22:35:04 +1000 Subject: [PATCH 05/11] Model Update ! --- .../stacks/metadata-manager/README.md | 2 +- .../management/commands/insert_mock_data.py | 4 +- .../app/migrations/0001_initial.py | 174 ++++++++++++----- .../metadata-manager/app/models/__init__.py | 1 + .../metadata-manager/app/models/base.py | 14 +- .../metadata-manager/app/models/contact.py | 21 ++- .../metadata-manager/app/models/individual.py | 14 +- .../metadata-manager/app/models/library.py | 12 +- .../metadata-manager/app/models/project.py | 19 +- .../metadata-manager/app/models/sample.py | 9 +- .../metadata-manager/app/models/subject.py | 18 +- .../app/serializers/__init__.py | 2 + .../metadata-manager/app/serializers/base.py | 11 ++ .../app/serializers/contact.py | 7 +- .../app/serializers/individual.py | 17 ++ .../app/serializers/library.py | 15 +- .../app/serializers/project.py | 10 +- .../app/serializers/sample.py | 7 +- .../app/serializers/subject.py | 19 +- .../metadata-manager/app/tests/factories.py | 61 ++++-- .../metadata-manager/app/tests/test_base.py | 6 +- .../metadata-manager/app/tests/test_models.py | 89 +++++---- .../app/tests/test_viewsets.py | 69 +------ .../metadata-manager/app/tests/utils.py | 24 ++- .../stacks/metadata-manager/app/urls/base.py | 3 +- .../metadata-manager/app/viewsets/__init__.py | 1 + .../metadata-manager/app/viewsets/base.py | 26 +++ .../metadata-manager/app/viewsets/contact.py | 12 +- .../app/viewsets/individual.py | 22 +++ .../metadata-manager/app/viewsets/library.py | 21 +-- .../metadata-manager/app/viewsets/project.py | 15 +- .../metadata-manager/app/viewsets/sample.py | 12 +- .../metadata-manager/app/viewsets/subject.py | 23 +-- .../metadata-manager/docs/schema.drawio.svg | 2 +- .../proc/service/tracking_sheet_srv.py | 175 ++++++++++++------ .../metadata-manager/proc/service/utils.py | 9 +- .../proc/tests/test_tracking_sheet_srv.py | 157 +++++++++++----- 37 files changed, 692 insertions(+), 411 deletions(-) create mode 100644 lib/workload/stateless/stacks/metadata-manager/app/serializers/base.py create mode 100644 lib/workload/stateless/stacks/metadata-manager/app/serializers/individual.py create mode 100644 lib/workload/stateless/stacks/metadata-manager/app/viewsets/base.py create mode 100644 lib/workload/stateless/stacks/metadata-manager/app/viewsets/individual.py diff --git a/lib/workload/stateless/stacks/metadata-manager/README.md b/lib/workload/stateless/stacks/metadata-manager/README.md index 3f639a572..2143035ea 100644 --- a/lib/workload/stateless/stacks/metadata-manager/README.md +++ b/lib/workload/stateless/stacks/metadata-manager/README.md @@ -62,7 +62,7 @@ from the Google tracking sheet and mapping it to its respective model as follows | Sheet Header | Table | Field Name | |-------------------|------------|----------------------| | SubjectID | `Subject` | lab_subject_id | -| ExternalSubjectID | `Subject` | external_subject_id | +| ExternalSubjectID | `Subject` | subject_id | | SampleID | `Specimen` | sample_id | | ExternalSampleID | `Specimen` | external_specimen_id | | Source | `Specimen` | source | diff --git a/lib/workload/stateless/stacks/metadata-manager/app/management/commands/insert_mock_data.py b/lib/workload/stateless/stacks/metadata-manager/app/management/commands/insert_mock_data.py index efc8e6403..1dc1df82c 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/management/commands/insert_mock_data.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/management/commands/insert_mock_data.py @@ -3,7 +3,7 @@ from django.core.management import BaseCommand from proc.service.tracking_sheet_srv import sanitize_lab_metadata_df, persist_lab_metadata -from proc.tests.test_tracking_sheet_srv import RECORD_1, RECORD_2, RECORD_3 +from proc.tests.test_tracking_sheet_srv import RECORD_1, RECORD_2, RECORD_3, SHEET_YEAR class Command(BaseCommand): @@ -16,7 +16,7 @@ def handle(self, *args, **options): metadata_pd = pd.json_normalize(mock_sheet_data) metadata_pd = sanitize_lab_metadata_df(metadata_pd) - result = persist_lab_metadata(metadata_pd) + result = persist_lab_metadata(metadata_pd, SHEET_YEAR) print(json.dumps(result, indent=4)) print("insert mock data completed") diff --git a/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py b/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py index 719beb565..fe3745c16 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 5.1 on 2024-09-13 08:19 +# Generated by Django 5.1 on 2024-09-18 11:57 import django.core.validators import django.db.models.deletion @@ -19,31 +19,34 @@ class Migration(migrations.Migration): migrations.CreateModel( name='Contact', fields=[ - ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), + ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='ULID is expected to be 26 characters long', regex='[\\w]{26}$')])), ('contact_id', models.CharField(blank=True, null=True, unique=True)), + ('name', models.CharField(blank=True, null=True)), + ('description', models.CharField(blank=True, null=True)), + ('email', models.EmailField(blank=True, max_length=254, null=True)), ], options={ 'abstract': False, }, ), migrations.CreateModel( - name='Sample', + name='Individual', fields=[ - ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), - ('sample_id', models.CharField(blank=True, null=True, unique=True)), - ('external_sample_id', models.CharField(blank=True, null=True)), - ('source', models.CharField(blank=True, choices=[('ascites', 'Ascites'), ('blood', 'Blood'), ('bone-marrow', 'BoneMarrow'), ('buccal', 'Buccal'), ('cell-line', 'Cell_line'), ('cfDNA', 'Cfdna'), ('cyst-fluid', 'Cyst Fluid'), ('DNA', 'Dna'), ('eyebrow-hair', 'Eyebrow Hair'), ('FFPE', 'Ffpe'), ('FNA', 'Fna'), ('OCT', 'Oct'), ('organoid', 'Organoid'), ('PDX-tissue', 'Pdx Tissue'), ('plasma-serum', 'Plasma Serum'), ('RNA', 'Rna'), ('tissue', 'Tissue'), ('skin', 'Skin'), ('water', 'Water')], null=True)), + ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='ULID is expected to be 26 characters long', regex='[\\w]{26}$')])), + ('individual_id', models.CharField(blank=True, null=True, unique=True)), + ('source', models.CharField(blank=True, null=True)), ], options={ 'abstract': False, }, ), migrations.CreateModel( - name='Subject', + name='Sample', fields=[ - ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), - ('subject_id', models.CharField(blank=True, null=True, unique=True)), - ('external_subject_id', models.CharField(blank=True, null=True)), + ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='ULID is expected to be 26 characters long', regex='[\\w]{26}$')])), + ('sample_id', models.CharField(blank=True, null=True, unique=True)), + ('external_sample_id', models.CharField(blank=True, null=True)), + ('source', models.CharField(blank=True, choices=[('ascites', 'Ascites'), ('blood', 'Blood'), ('bone-marrow', 'BoneMarrow'), ('buccal', 'Buccal'), ('cell-line', 'Cell_line'), ('cfDNA', 'Cfdna'), ('cyst-fluid', 'Cyst Fluid'), ('DNA', 'Dna'), ('eyebrow-hair', 'Eyebrow Hair'), ('FFPE', 'Ffpe'), ('FNA', 'Fna'), ('OCT', 'Oct'), ('organoid', 'Organoid'), ('PDX-tissue', 'Pdx Tissue'), ('plasma-serum', 'Plasma Serum'), ('RNA', 'Rna'), ('tissue', 'Tissue'), ('skin', 'Skin'), ('water', 'Water')], null=True)), ], options={ 'abstract': False, @@ -52,8 +55,11 @@ class Migration(migrations.Migration): migrations.CreateModel( name='HistoricalContact', fields=[ - ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), + ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='ULID is expected to be 26 characters long', regex='[\\w]{26}$')])), ('contact_id', models.CharField(blank=True, db_index=True, null=True)), + ('name', models.CharField(blank=True, null=True)), + ('description', models.CharField(blank=True, null=True)), + ('email', models.EmailField(blank=True, max_length=254, null=True)), ('history_id', models.AutoField(primary_key=True, serialize=False)), ('history_date', models.DateTimeField(db_index=True)), ('history_change_reason', models.CharField(max_length=100, null=True)), @@ -68,16 +74,63 @@ class Migration(migrations.Migration): }, bases=(simple_history.models.HistoricalChanges, models.Model), ), + migrations.CreateModel( + name='HistoricalIndividual', + fields=[ + ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='ULID is expected to be 26 characters long', regex='[\\w]{26}$')])), + ('individual_id', models.CharField(blank=True, db_index=True, null=True)), + ('source', models.CharField(blank=True, null=True)), + ('history_id', models.AutoField(primary_key=True, serialize=False)), + ('history_date', models.DateTimeField(db_index=True)), + ('history_change_reason', models.CharField(max_length=100, null=True)), + ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), + ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ], + options={ + 'verbose_name': 'historical individual', + 'verbose_name_plural': 'historical individuals', + 'ordering': ('-history_date', '-history_id'), + 'get_latest_by': ('history_date', 'history_id'), + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), + migrations.CreateModel( + name='HistoricalLibrary', + fields=[ + ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='ULID is expected to be 26 characters long', regex='[\\w]{26}$')])), + ('library_id', models.CharField(blank=True, db_index=True, null=True)), + ('phenotype', models.CharField(blank=True, choices=[('normal', 'Normal'), ('tumor', 'Tumor'), ('negative-control', 'Negative Control')], null=True)), + ('workflow', models.CharField(blank=True, choices=[('clinical', 'Clinical'), ('research', 'Research'), ('qc', 'Qc'), ('control', 'Control'), ('bcl', 'Bcl'), ('manual', 'Manual')], null=True)), + ('quality', models.CharField(blank=True, choices=[('very-poor', 'VeryPoor'), ('poor', 'Poor'), ('good', 'Good'), ('borderline', 'Borderline')], null=True)), + ('type', models.CharField(blank=True, choices=[('10X', 'Ten X'), ('BiModal', 'Bimodal'), ('ctDNA', 'Ct Dna'), ('ctTSO', 'Ct Tso'), ('exome', 'Exome'), ('MeDIP', 'Me Dip'), ('Metagenm', 'Metagenm'), ('MethylSeq', 'Methyl Seq'), ('TSO-DNA', 'TSO_DNA'), ('TSO-RNA', 'TSO_RNA'), ('WGS', 'Wgs'), ('WTS', 'Wts'), ('other', 'Other')], null=True)), + ('assay', models.CharField(blank=True, null=True)), + ('coverage', models.FloatField(blank=True, null=True)), + ('history_id', models.AutoField(primary_key=True, serialize=False)), + ('history_date', models.DateTimeField(db_index=True)), + ('history_change_reason', models.CharField(max_length=100, null=True)), + ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), + ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ('sample', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.sample')), + ], + options={ + 'verbose_name': 'historical library', + 'verbose_name_plural': 'historical librarys', + 'ordering': ('-history_date', '-history_id'), + 'get_latest_by': ('history_date', 'history_id'), + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), migrations.CreateModel( name='HistoricalProject', fields=[ - ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), + ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='ULID is expected to be 26 characters long', regex='[\\w]{26}$')])), ('project_id', models.CharField(blank=True, db_index=True, null=True)), + ('name', models.CharField(blank=True, null=True)), + ('description', models.CharField(blank=True, null=True)), ('history_id', models.AutoField(primary_key=True, serialize=False)), ('history_date', models.DateTimeField(db_index=True)), ('history_change_reason', models.CharField(max_length=100, null=True)), ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), - ('contact', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.contact')), ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), ], options={ @@ -91,7 +144,7 @@ class Migration(migrations.Migration): migrations.CreateModel( name='HistoricalSample', fields=[ - ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), + ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='ULID is expected to be 26 characters long', regex='[\\w]{26}$')])), ('sample_id', models.CharField(blank=True, db_index=True, null=True)), ('external_sample_id', models.CharField(blank=True, null=True)), ('source', models.CharField(blank=True, choices=[('ascites', 'Ascites'), ('blood', 'Blood'), ('bone-marrow', 'BoneMarrow'), ('buccal', 'Buccal'), ('cell-line', 'Cell_line'), ('cfDNA', 'Cfdna'), ('cyst-fluid', 'Cyst Fluid'), ('DNA', 'Dna'), ('eyebrow-hair', 'Eyebrow Hair'), ('FFPE', 'Ffpe'), ('FNA', 'Fna'), ('OCT', 'Oct'), ('organoid', 'Organoid'), ('PDX-tissue', 'Pdx Tissue'), ('plasma-serum', 'Plasma Serum'), ('RNA', 'Rna'), ('tissue', 'Tissue'), ('skin', 'Skin'), ('water', 'Water')], null=True)), @@ -112,9 +165,8 @@ class Migration(migrations.Migration): migrations.CreateModel( name='HistoricalSubject', fields=[ - ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), + ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='ULID is expected to be 26 characters long', regex='[\\w]{26}$')])), ('subject_id', models.CharField(blank=True, db_index=True, null=True)), - ('external_subject_id', models.CharField(blank=True, null=True)), ('history_id', models.AutoField(primary_key=True, serialize=False)), ('history_date', models.DateTimeField(db_index=True)), ('history_change_reason', models.CharField(max_length=100, null=True)), @@ -132,9 +184,11 @@ class Migration(migrations.Migration): migrations.CreateModel( name='Project', fields=[ - ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), + ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='ULID is expected to be 26 characters long', regex='[\\w]{26}$')])), ('project_id', models.CharField(blank=True, null=True, unique=True)), - ('contact', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='app.contact')), + ('name', models.CharField(blank=True, null=True)), + ('description', models.CharField(blank=True, null=True)), + ('contact_set', models.ManyToManyField(blank=True, null=True, related_name='project_set', to='app.contact')), ], options={ 'abstract': False, @@ -143,7 +197,7 @@ class Migration(migrations.Migration): migrations.CreateModel( name='Library', fields=[ - ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), + ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='ULID is expected to be 26 characters long', regex='[\\w]{26}$')])), ('library_id', models.CharField(blank=True, null=True, unique=True)), ('phenotype', models.CharField(blank=True, choices=[('normal', 'Normal'), ('tumor', 'Tumor'), ('negative-control', 'Negative Control')], null=True)), ('workflow', models.CharField(blank=True, choices=[('clinical', 'Clinical'), ('research', 'Research'), ('qc', 'Qc'), ('control', 'Control'), ('bcl', 'Bcl'), ('manual', 'Manual')], null=True)), @@ -151,40 +205,74 @@ class Migration(migrations.Migration): ('type', models.CharField(blank=True, choices=[('10X', 'Ten X'), ('BiModal', 'Bimodal'), ('ctDNA', 'Ct Dna'), ('ctTSO', 'Ct Tso'), ('exome', 'Exome'), ('MeDIP', 'Me Dip'), ('Metagenm', 'Metagenm'), ('MethylSeq', 'Methyl Seq'), ('TSO-DNA', 'TSO_DNA'), ('TSO-RNA', 'TSO_RNA'), ('WGS', 'Wgs'), ('WTS', 'Wts'), ('other', 'Other')], null=True)), ('assay', models.CharField(blank=True, null=True)), ('coverage', models.FloatField(blank=True, null=True)), - ('project', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='app.project')), + ('project_set', models.ManyToManyField(blank=True, null=True, related_name='library_set', to='app.project')), ('sample', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='app.sample')), - ('subject', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='app.subject')), ], options={ 'abstract': False, }, ), migrations.CreateModel( - name='HistoricalLibrary', + name='HistoricalProject_contact_set', fields=[ - ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), - ('library_id', models.CharField(blank=True, db_index=True, null=True)), - ('phenotype', models.CharField(blank=True, choices=[('normal', 'Normal'), ('tumor', 'Tumor'), ('negative-control', 'Negative Control')], null=True)), - ('workflow', models.CharField(blank=True, choices=[('clinical', 'Clinical'), ('research', 'Research'), ('qc', 'Qc'), ('control', 'Control'), ('bcl', 'Bcl'), ('manual', 'Manual')], null=True)), - ('quality', models.CharField(blank=True, choices=[('very-poor', 'VeryPoor'), ('poor', 'Poor'), ('good', 'Good'), ('borderline', 'Borderline')], null=True)), - ('type', models.CharField(blank=True, choices=[('10X', 'Ten X'), ('BiModal', 'Bimodal'), ('ctDNA', 'Ct Dna'), ('ctTSO', 'Ct Tso'), ('exome', 'Exome'), ('MeDIP', 'Me Dip'), ('Metagenm', 'Metagenm'), ('MethylSeq', 'Methyl Seq'), ('TSO-DNA', 'TSO_DNA'), ('TSO-RNA', 'TSO_RNA'), ('WGS', 'Wgs'), ('WTS', 'Wts'), ('other', 'Other')], null=True)), - ('assay', models.CharField(blank=True, null=True)), - ('coverage', models.FloatField(blank=True, null=True)), - ('history_id', models.AutoField(primary_key=True, serialize=False)), - ('history_date', models.DateTimeField(db_index=True)), - ('history_change_reason', models.CharField(max_length=100, null=True)), - ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), - ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), - ('project', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.project')), - ('sample', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.sample')), - ('subject', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.subject')), + ('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')), + ('m2m_history_id', models.AutoField(primary_key=True, serialize=False)), + ('contact', models.ForeignKey(blank=True, db_constraint=False, db_tablespace='', null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.contact')), + ('history', models.ForeignKey(db_constraint=False, on_delete=django.db.models.deletion.DO_NOTHING, to='app.historicalproject')), + ('project', models.ForeignKey(blank=True, db_constraint=False, db_tablespace='', null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.project')), ], options={ - 'verbose_name': 'historical library', - 'verbose_name_plural': 'historical librarys', - 'ordering': ('-history_date', '-history_id'), - 'get_latest_by': ('history_date', 'history_id'), + 'verbose_name': 'HistoricalProject_contact_set', + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), + migrations.CreateModel( + name='HistoricalLibrary_project_set', + fields=[ + ('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')), + ('m2m_history_id', models.AutoField(primary_key=True, serialize=False)), + ('history', models.ForeignKey(db_constraint=False, on_delete=django.db.models.deletion.DO_NOTHING, to='app.historicallibrary')), + ('library', models.ForeignKey(blank=True, db_constraint=False, db_tablespace='', null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.library')), + ('project', models.ForeignKey(blank=True, db_constraint=False, db_tablespace='', null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.project')), + ], + options={ + 'verbose_name': 'HistoricalLibrary_project_set', }, bases=(simple_history.models.HistoricalChanges, models.Model), ), + migrations.CreateModel( + name='Subject', + fields=[ + ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='ULID is expected to be 26 characters long', regex='[\\w]{26}$')])), + ('subject_id', models.CharField(blank=True, null=True, unique=True)), + ('individual_set', models.ManyToManyField(blank=True, null=True, related_name='subject_set', to='app.individual')), + ], + options={ + 'abstract': False, + }, + ), + migrations.AddField( + model_name='library', + name='subject', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='app.subject'), + ), + migrations.CreateModel( + name='HistoricalSubject_individual_set', + fields=[ + ('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')), + ('m2m_history_id', models.AutoField(primary_key=True, serialize=False)), + ('history', models.ForeignKey(db_constraint=False, on_delete=django.db.models.deletion.DO_NOTHING, to='app.historicalsubject')), + ('individual', models.ForeignKey(blank=True, db_constraint=False, db_tablespace='', null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.individual')), + ('subject', models.ForeignKey(blank=True, db_constraint=False, db_tablespace='', null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.subject')), + ], + options={ + 'verbose_name': 'HistoricalSubject_individual_set', + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), + migrations.AddField( + model_name='historicallibrary', + name='subject', + field=models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.subject'), + ), ] diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/__init__.py b/lib/workload/stateless/stacks/metadata-manager/app/models/__init__.py index 7af8375e8..6eb02e89f 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/__init__.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/__init__.py @@ -5,3 +5,4 @@ from .subject import Subject from .contact import Contact from .project import Project +from .individual import Individual diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/base.py b/lib/workload/stateless/stacks/metadata-manager/app/models/base.py index 658fabd78..5d20354f0 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/base.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/base.py @@ -20,16 +20,15 @@ QuerySet, ) from rest_framework.settings import api_settings -from simple_history.models import HistoricalRecords - from app.pagination import PaginationConstant logger = logging.getLogger(__name__) class BaseManager(models.Manager): - def get_by_keyword(self, **kwargs) -> QuerySet: - qs: QuerySet = super().get_queryset() + def get_by_keyword(self, qs=None, **kwargs) -> QuerySet: + if qs is None: + qs = super().get_queryset() return self.get_model_fields_query(qs, **kwargs) @staticmethod @@ -119,15 +118,18 @@ class Meta: null=False, validators=[ RegexValidator( - regex=r'^[\w]{3}\.[\w]{26}$', - message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', + regex=r'[\w]{26}$', + message='ULID is expected to be 26 characters long', code='invalid_orcabus_id' )] ) def save(self, *args, **kwargs): + if not self.orcabus_id: + self.orcabus_id = ulid.new().str self.full_clean() + return super(BaseModel, self).save(*args, **kwargs) @classmethod diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/contact.py b/lib/workload/stateless/stacks/metadata-manager/app/models/contact.py index fa6cac6a5..b3e08e77f 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/contact.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/contact.py @@ -1,4 +1,3 @@ -import ulid from django.db import models from simple_history.models import HistoricalRecords @@ -10,7 +9,7 @@ class ContactManager(BaseManager): class Contact(BaseModel): - orcabus_id_prefix = 'cnt' + orcabus_id_prefix = 'ctc.' objects = ContactManager() contact_id = models.CharField( @@ -18,10 +17,18 @@ class Contact(BaseModel): blank=True, null=True ) + name = models.CharField( + blank=True, + null=True + ) + description = models.CharField( + blank=True, + null=True + ) + email = models.EmailField( + blank=True, + null=True + ) + # history history = HistoricalRecords() - - def save(self, *args, **kwargs): - if not self.orcabus_id: - self.orcabus_id = self.orcabus_id_prefix + '.' + ulid.new().str - super().save(*args, **kwargs) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/individual.py b/lib/workload/stateless/stacks/metadata-manager/app/models/individual.py index 53192adfa..f6f68586b 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/individual.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/individual.py @@ -1,4 +1,3 @@ -import ulid from django.db import models from simple_history.models import HistoricalRecords @@ -10,17 +9,18 @@ class IndividualManager(BaseManager): class Individual(BaseModel): - orcabus_id_prefix = 'idv' + orcabus_id_prefix = 'idv.' objects = IndividualManager() - history = HistoricalRecords() individual_id = models.CharField( unique=True, blank=True, null=True ) + source = models.CharField( + blank=True, + null=True + ) - def save(self, *args, **kwargs): - if not self.orcabus_id: - self.orcabus_id = self.orcabus_id_prefix + '.' + ulid.new().str - super().save(*args, **kwargs) + # history + history = HistoricalRecords() diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/library.py b/lib/workload/stateless/stacks/metadata-manager/app/models/library.py index 332346e81..3252f0636 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/library.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/library.py @@ -1,6 +1,5 @@ import logging -import ulid from django.db import models from simple_history.models import HistoricalRecords @@ -56,9 +55,8 @@ class LibraryManager(BaseManager): class Library(BaseModel): - orcabus_id_prefix = 'lib' + orcabus_id_prefix = 'lib.' objects = LibraryManager() - history = HistoricalRecords() library_id = models.CharField( unique=True, @@ -97,9 +95,7 @@ class Library(BaseModel): # Relationships sample = models.ForeignKey(Sample, on_delete=models.SET_NULL, blank=True, null=True) subject = models.ForeignKey(Subject, on_delete=models.SET_NULL, blank=True, null=True) - project = models.ForeignKey(Project, on_delete=models.SET_NULL, blank=True, null=True) + project_set = models.ManyToManyField(Project, related_name='library_set', blank=True, null=True) - def save(self, *args, **kwargs): - if not self.orcabus_id: - self.orcabus_id = self.orcabus_id_prefix + '.' + ulid.new().str - super().save(*args, **kwargs) + # history + history = HistoricalRecords(m2m_fields=[project_set]) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/project.py b/lib/workload/stateless/stacks/metadata-manager/app/models/project.py index 3ad11fc05..de16bc4fa 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/project.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/project.py @@ -11,20 +11,25 @@ class ProjectManager(BaseManager): class Project(BaseModel): - orcabus_id_prefix = 'prj' + orcabus_id_prefix = 'prj.' objects = ProjectManager() - history = HistoricalRecords() project_id = models.CharField( unique=True, blank=True, null=True ) + name = models.CharField( + blank=True, + null=True + ) + description = models.CharField( + blank=True, + null=True + ) # Relationships - contact = models.ForeignKey(Contact, on_delete=models.SET_NULL, blank=True, null=True) + contact_set = models.ManyToManyField(Contact, related_name='project_set', blank=True, null=True) - def save(self, *args, **kwargs): - if not self.orcabus_id: - self.orcabus_id = self.orcabus_id_prefix + '.' + ulid.new().str - super().save(*args, **kwargs) + # history + history = HistoricalRecords(m2m_fields=[contact_set]) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/sample.py b/lib/workload/stateless/stacks/metadata-manager/app/models/sample.py index 8b79c07d7..f6cc6a248 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/sample.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/sample.py @@ -32,9 +32,8 @@ class SampleManager(BaseManager): class Sample(BaseModel): - orcabus_id_prefix = 'smp' + orcabus_id_prefix = 'smp.' objects = SampleManager() - history = HistoricalRecords() sample_id = models.CharField( unique=True, @@ -47,7 +46,5 @@ class Sample(BaseModel): ) source = models.CharField(choices=Source.choices, blank=True, null=True) - def save(self, *args, **kwargs): - if not self.orcabus_id: - self.orcabus_id = self.orcabus_id_prefix + '.' + ulid.new().str - super().save(*args, **kwargs) + # history + history = HistoricalRecords() diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/subject.py b/lib/workload/stateless/stacks/metadata-manager/app/models/subject.py index 2e38882dd..1d9afad07 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/subject.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/subject.py @@ -1,4 +1,3 @@ -import ulid from django.db import models from simple_history.models import HistoricalRecords @@ -10,21 +9,16 @@ class SubjectManager(BaseManager): class Subject(BaseModel): - orcabus_id_prefix = 'sbj' + orcabus_id_prefix = 'sbj.' objects = SubjectManager() - history = HistoricalRecords() - subject_id = models.CharField( unique=True, blank=True, null=True ) - external_subject_id = models.CharField( - blank=True, - null=True - ) - def save(self, *args, **kwargs): - if not self.orcabus_id: - self.orcabus_id = self.orcabus_id_prefix + '.' + ulid.new().str - super().save(*args, **kwargs) + # Relationships + individual_set = models.ManyToManyField('Individual', related_name='subject_set', blank=True, null=True) + + # history + history = HistoricalRecords(m2m_fields=[individual_set]) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/serializers/__init__.py b/lib/workload/stateless/stacks/metadata-manager/app/serializers/__init__.py index e69de29bb..43d293bdb 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/serializers/__init__.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/serializers/__init__.py @@ -0,0 +1,2 @@ +from .subject import SubjectSerializer +from .individual import IndividualSerializer diff --git a/lib/workload/stateless/stacks/metadata-manager/app/serializers/base.py b/lib/workload/stateless/stacks/metadata-manager/app/serializers/base.py new file mode 100644 index 000000000..d372527a6 --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/app/serializers/base.py @@ -0,0 +1,11 @@ +from rest_framework import serializers + + +class SerializersBase(serializers.ModelSerializer): + prefix = '' + + def to_representation(self, instance): + representation = super().to_representation(instance) + representation['orcabus_id'] = self.prefix + str(representation['orcabus_id']) + return representation + diff --git a/lib/workload/stateless/stacks/metadata-manager/app/serializers/contact.py b/lib/workload/stateless/stacks/metadata-manager/app/serializers/contact.py index f94da911e..f62583d6a 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/serializers/contact.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/serializers/contact.py @@ -1,9 +1,10 @@ -from rest_framework import serializers - from app.models import Contact +from .base import SerializersBase + +class ContactSerializer(SerializersBase): + prefix = Contact.orcabus_id_prefix -class ContactSerializer(serializers.ModelSerializer): class Meta: model = Contact fields = "__all__" diff --git a/lib/workload/stateless/stacks/metadata-manager/app/serializers/individual.py b/lib/workload/stateless/stacks/metadata-manager/app/serializers/individual.py new file mode 100644 index 000000000..ad91fe26d --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/app/serializers/individual.py @@ -0,0 +1,17 @@ +from app.models import Individual +from .base import SerializersBase + + +class IndividualSerializer(SerializersBase): + prefix = Individual.orcabus_id_prefix + + class Meta: + model = Individual + fields = '__all__' + + +class IndividualDetailSerializer(IndividualSerializer): + from .subject import SubjectSerializer + + subject_set = SubjectSerializer(many=True, read_only=True) + diff --git a/lib/workload/stateless/stacks/metadata-manager/app/serializers/library.py b/lib/workload/stateless/stacks/metadata-manager/app/serializers/library.py index a3248083f..71b60a632 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/serializers/library.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/serializers/library.py @@ -1,9 +1,18 @@ -from rest_framework import serializers - from app.models import Library +from .base import SerializersBase +from .project import ProjectSerializer +from .sample import SampleSerializer +from .subject import SubjectSerializer + + +class LibrarySerializer(SerializersBase): + prefix = Library.orcabus_id_prefix + + project_set = ProjectSerializer(many=True, read_only=True) + sample = SampleSerializer(read_only=True) + subject = SubjectSerializer(read_only=True) -class LibrarySerializer(serializers.ModelSerializer): class Meta: model = Library fields = "__all__" diff --git a/lib/workload/stateless/stacks/metadata-manager/app/serializers/project.py b/lib/workload/stateless/stacks/metadata-manager/app/serializers/project.py index c991be8c7..2e2f8d264 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/serializers/project.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/serializers/project.py @@ -1,9 +1,13 @@ -from rest_framework import serializers - +from .base import SerializersBase from app.models import Project +from .contact import ContactSerializer + + +class ProjectSerializer(SerializersBase): + prefix = Project.orcabus_id_prefix + contact_set = ContactSerializer(many=True, read_only=True) -class ProjectSerializer(serializers.ModelSerializer): class Meta: model = Project fields = "__all__" diff --git a/lib/workload/stateless/stacks/metadata-manager/app/serializers/sample.py b/lib/workload/stateless/stacks/metadata-manager/app/serializers/sample.py index b889ba461..01480c555 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/serializers/sample.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/serializers/sample.py @@ -1,9 +1,10 @@ -from rest_framework import serializers - +from .base import SerializersBase from app.models import Sample -class SampleSerializer(serializers.ModelSerializer): +class SampleSerializer(SerializersBase): + prefix = Sample.orcabus_id_prefix + class Meta: model = Sample fields = "__all__" diff --git a/lib/workload/stateless/stacks/metadata-manager/app/serializers/subject.py b/lib/workload/stateless/stacks/metadata-manager/app/serializers/subject.py index e9a437dcc..6fee4a85f 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/serializers/subject.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/serializers/subject.py @@ -1,9 +1,20 @@ -from rest_framework import serializers - from app.models import Subject +from .base import SerializersBase + + +class SubjectSerializer(SerializersBase): + prefix = Subject.orcabus_id_prefix + + class Meta: + model = Subject + exclude = ["individual_set"] -class SubjectSerializer(serializers.ModelSerializer): +class SubjectDetailSerializer(SubjectSerializer): + from .individual import IndividualSerializer + class Meta: model = Subject - fields = "__all__" + fields = '__all__' + + individual_set = IndividualSerializer(many=True, read_only=True) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/tests/factories.py b/lib/workload/stateless/stacks/metadata-manager/app/tests/factories.py index c97f0242c..cf50fc900 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/tests/factories.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/tests/factories.py @@ -1,19 +1,19 @@ import factory -from app.models import Subject, Specimen, Library +from app.models import Subject, Sample, Library, Project, Contact, Individual INDIVIDUAL_1 = { - "individual_id": "I001" + "individual_id": "SBJ001", + "source": "lab" } SUBJECT_1 = { - "lab_subject_id": "SBJ001", - "external_subject_id": "EXT_SUB_ID_A" + "subject_id": "XS-0001", } -SPECIMEN_1 = { - "lab_specimen_id": "PRJ001", - "external_specimen_id": "EXT_SPC_ID_A", +SAMPLE_1 = { + "sample_id": "PRJ001", + "external_sample_id": "EXT_SPM_ID_A", "source": "FFPE" } @@ -29,22 +29,39 @@ 'project_name': 'test_project' } +PROJECT_1 = { + 'project_id': 'prj-01', + 'name': 'test_project' +} + +CONTACT_1 = { + 'contact_id': 'doe-01', + 'name': 'doe', +} + + +class IndividualFactory(factory.django.DjangoModelFactory): + class Meta: + model = Individual + + individual_id = INDIVIDUAL_1['individual_id'] + source = INDIVIDUAL_1['source'] + class SubjectFactory(factory.django.DjangoModelFactory): class Meta: model = Subject - lab_subject_id = SUBJECT_1['lab_subject_id'] - external_subject_id = SUBJECT_1['external_subject_id'] + subject_id = SUBJECT_1['subject_id'] -class SpecimenFactory(factory.django.DjangoModelFactory): +class SampleFactory(factory.django.DjangoModelFactory): class Meta: - model = Specimen + model = Sample - lab_specimen_id = SPECIMEN_1['lab_specimen_id'] - external_specimen_id = SPECIMEN_1['external_specimen_id'] - source = SPECIMEN_1['source'] + sample_id = SAMPLE_1['sample_id'] + external_sample_id = SAMPLE_1['external_sample_id'] + source = SAMPLE_1['source'] class LibraryFactory(factory.django.DjangoModelFactory): @@ -58,3 +75,19 @@ class Meta: type = LIBRARY_1["type"] assay = LIBRARY_1["assay"] coverage = LIBRARY_1["coverage"] + + +class ProjectFactory(factory.django.DjangoModelFactory): + class Meta: + model = Project + + project_id = PROJECT_1['project_id'] + name = PROJECT_1['name'] + + +class ContactFactory(factory.django.DjangoModelFactory): + class Meta: + model = Contact + + contact_id = CONTACT_1['contact_id'] + name = CONTACT_1['name'] diff --git a/lib/workload/stateless/stacks/metadata-manager/app/tests/test_base.py b/lib/workload/stateless/stacks/metadata-manager/app/tests/test_base.py index 3e45c2770..14e3a0f67 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/tests/test_base.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/tests/test_base.py @@ -15,7 +15,7 @@ def setUp(self) -> None: def test_reduce_multi_values_qor(self): """ - python manage.py tests app.tests.test_base.BaseManagerTestCase.test_reduce_multi_values_qor + python manage.py test app.tests.test_base.BaseManagerTestCase.test_reduce_multi_values_qor """ q = BaseManager.reduce_multi_values_qor( "subject_id", ["SBJ000001", "SBJ000002"] @@ -27,7 +27,7 @@ def test_reduce_multi_values_qor(self): def test_reduce_multi_values_qor_auto_pack(self): """ - python manage.py tests app.tests.test_base.BaseManagerTestCase.test_reduce_multi_values_qor_auto_pack + python manage.py test app.tests.test_base.BaseManagerTestCase.test_reduce_multi_values_qor_auto_pack """ q = BaseManager.reduce_multi_values_qor("subject_id", "SBJ000001") logger.info(q) @@ -37,7 +37,7 @@ def test_reduce_multi_values_qor_auto_pack(self): def test_base_model_must_abstract(self): """ - python manage.py tests app.tests.test_base.BaseManagerTestCase.test_base_model_must_abstract + python manage.py test app.tests.test_base.BaseManagerTestCase.test_base_model_must_abstract """ try: BaseModel() diff --git a/lib/workload/stateless/stacks/metadata-manager/app/tests/test_models.py b/lib/workload/stateless/stacks/metadata-manager/app/tests/test_models.py index 3dc3125dd..a6146cdaa 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/tests/test_models.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/tests/test_models.py @@ -1,72 +1,71 @@ import logging from django.test import TestCase -import ulid -from app.models import Subject, Specimen, Library +from app.models import Subject, Sample, Library, Contact, Project, Individual +from .factories import LIBRARY_1, SAMPLE_1, INDIVIDUAL_1, SUBJECT_1, PROJECT_1, CONTACT_1 +from .utils import insert_mock_1 logger = logging.getLogger() logger.setLevel(logging.INFO) -class MetadataTestCase(TestCase): +class ModelTestCase(TestCase): def setUp(self): - subject = Subject.objects.create( - orcabus_id=f'sbj.{ulid.new().str}', - lab_subject_id='SBJ001', - ) - subject.full_clean() - subject.save() - - specimen = Specimen.objects.create( - lab_specimen_id='SPC001', - subject=subject, - ) - specimen.full_clean() - specimen.save() - - library = Library.objects.create( - library_id='L001', - phenotype='negative-control', - workflow='clinical', - quality='poor', - type='WTS', - assay='NebRNA', - coverage='6.3', - specimen=specimen, - project_name='test_project', - project_owner='test_owner', - ) - library.full_clean() - library.save() + insert_mock_1() def test_get_simple_model(self): """ - python manage.py test app.tests.test_models.MetadataTestCase.test_get_simple_model + python manage.py test app.tests.test_models.ModelTestCase.test_get_simple_model """ logger.info("Test get on simple lab models") - lib_one = Library.objects.get(library_id="L001") - self.assertEqual(lib_one.library_id, "L001", "incorrect 'id' from given internal library id") + lib_one = Library.objects.get(library_id=LIBRARY_1['library_id'], ) + self.assertEqual(lib_one.library_id, LIBRARY_1['library_id'], "incorrect 'id' from given internal library id") - spc_one = Specimen.objects.get(lab_specimen_id="SPC001") - self.assertEqual(spc_one.lab_specimen_id, "SPC001", "incorrect 'id' from given internal specimen id") + spc_one = Sample.objects.get(sample_id=SAMPLE_1['sample_id'], ) + self.assertEqual(spc_one.sample_id, SAMPLE_1['sample_id'], "incorrect 'id' from given internal sample id") - sub_one = Subject.objects.get(lab_subject_id="SBJ001") - self.assertEqual(sub_one.lab_subject_id, "SBJ001", "incorrect 'id' from given internal subject id") + sub_one = Subject.objects.get(subject_id=SUBJECT_1['subject_id'], ) + self.assertEqual(sub_one.subject_id, SUBJECT_1['subject_id'], + "incorrect 'id' from subject_id") + + cnt_one = Contact.objects.get(contact_id=CONTACT_1['contact_id'], ) + self.assertEqual(cnt_one.contact_id, CONTACT_1['contact_id'], "incorrect 'id' from given internal contact id") + + idv_one = Individual.objects.get(individual_id=INDIVIDUAL_1['individual_id'], ) + self.assertEqual(idv_one.individual_id, INDIVIDUAL_1['individual_id'], + "incorrect 'id' from given internal individual id") + + prj_one = Project.objects.get(project_id=PROJECT_1['project_id'], ) + self.assertEqual(prj_one.project_id, PROJECT_1['project_id'], "incorrect 'id' from given internal project id") def test_metadata_model_relationship(self): """ - python manage.py test app.tests.test_models.MetadataTestCase.test_metadata_model_relationship + python manage.py test app.tests.test_models.ModelTestCase.test_metadata_model_relationship """ logger.info("Test the relationship model within the lab metadata") - lib_one = Library.objects.get(library_id="L001") + lib_one = Library.objects.get(library_id=LIBRARY_1['library_id']) - # find the linked specimen - spc_one = lib_one.specimen - self.assertEqual(spc_one.lab_specimen_id, "SPC001", "incorrect specimen 'id' should linked to library") + # find the linked sample + smp_one = lib_one.sample + self.assertEqual(smp_one.sample_id, SAMPLE_1['sample_id'], "incorrect sample 'id' should linked to library") # find the linked subject - sub_one = spc_one.subject - self.assertEqual(sub_one.lab_subject_id, "SBJ001", "incorrect subject 'id' linked to specimen") + sub_one = lib_one.subject + self.assertEqual(sub_one.subject_id, SUBJECT_1['subject_id'], + "incorrect subject 'id' linked to sample") + + # find the linked individual + idv_one = sub_one.individual_set.get(individual_id=INDIVIDUAL_1['individual_id']) + self.assertEqual(idv_one.individual_id, INDIVIDUAL_1['individual_id'], + "incorrect individual 'id' linked to subject") + + # find the linked project + prj_one = lib_one.project_set.get(project_id=PROJECT_1['project_id']) + self.assertEqual(prj_one.project_id, PROJECT_1['project_id'], "incorrect project 'id' linked to library") + + # find the linked contact + cnt_one = prj_one.contact_set.get(contact_id=CONTACT_1['contact_id']) + self.assertEqual(cnt_one.contact_id, CONTACT_1['contact_id'], "incorrect contact 'id' linked to project") diff --git a/lib/workload/stateless/stacks/metadata-manager/app/tests/test_viewsets.py b/lib/workload/stateless/stacks/metadata-manager/app/tests/test_viewsets.py index 58f35bbdd..15a7a457c 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/tests/test_viewsets.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/tests/test_viewsets.py @@ -2,7 +2,7 @@ from django.test import TestCase -from app.tests.factories import LIBRARY_1, SUBJECT_1, SPECIMEN_1 +from app.tests.factories import LIBRARY_1, SUBJECT_1, SAMPLE_1 from app.tests.utils import insert_mock_1 logger = logging.getLogger() @@ -21,40 +21,39 @@ def test_get_api(self): """ python manage.py test app.tests.test_viewsets.LabViewSetTestCase.test_get_api """ - # Get sequence list model_to_check = [ { "path": "library", "props": LIBRARY_1, - "lab_id_key": "library_id" + "id_key": "library_id" }, { - "path": "specimen", - "props": SPECIMEN_1, - "lab_id_key": "lab_specimen_id" + "path": "sample", + "props": SAMPLE_1, + "id_key": "sample_id" }, { "path": "subject", "props": SUBJECT_1, - "lab_id_key": "lab_subject_id" + "id_key": "subject_id" } ] for model in model_to_check: path_id = model['path'] - lab_id_key = model['lab_id_key'] + id_key = model['id_key'] path = version_endpoint(path_id) logger.info(f"check API path for '{path}'") response = self.client.get(f"/{path}/") self.assertEqual(response.status_code, 200, "Ok status response is expected") - print(lab_id_key) + result_response = response.data["results"] self.assertGreater(len(result_response), 0, "A result is expected") logger.debug("Check if unique data has a single entry") - response = self.client.get(f"/{path}/?{lab_id_key}={model['props'][lab_id_key]}") + response = self.client.get(f"/{path}/?{id_key}={model['props'][id_key]}") results_response = response.data["results"] self.assertEqual( len(results_response), 1, "Single result is expected for unique data" @@ -69,53 +68,3 @@ def test_get_api(self): "No results are expected for unrecognized query parameter", ) - def test_library_full_model_api(self): - """ - python manage.py test app.tests.test_viewsets.LabViewSetTestCase.test_library_full_model_api - """ - path = version_endpoint('library/full') - - logger.info(f"check API path for '{path}'") - response = self.client.get(f"/{path}/") - self.assertEqual(response.status_code, 200, - "Ok status response is expected") - - result_response = response.data["results"] - self.assertGreater(len(result_response), 0, "A result is expected") - - logger.debug("Check if unique data has a single entry") - response = self.client.get(f"/{path}/?library_id={LIBRARY_1['library_id']}") - results_response = response.data["results"] - self.assertEqual( - len(results_response), 1, "Single result is expected for unique data" - ) - - logger.debug("check if specimen and library are linked") - self.assertEqual(result_response[0]['specimen']['lab_specimen_id'], SPECIMEN_1["lab_specimen_id"], ) - self.assertEqual(result_response[0]['specimen']['subject']['lab_subject_id'], SUBJECT_1["lab_subject_id"], ) - - def test_subject_full_model_api(self): - """ - python manage.py test app.tests.test_viewsets.LabViewSetTestCase.test_subject_full_model_api - """ - path = version_endpoint('subject/full') - - logger.info(f"check API path for '{path}'") - response = self.client.get(f"/{path}/") - self.assertEqual(response.status_code, 200, - "Ok status response is expected") - - result_response = response.data["results"] - self.assertGreater(len(result_response), 0, "A result is expected") - - logger.debug("Check if unique data has a single entry") - response = self.client.get(f"/{path}/?lab_subject_id={SUBJECT_1['lab_subject_id']}") - results_response = response.data["results"] - self.assertEqual( - len(results_response), 1, "Single result is expected for unique data" - ) - - logger.debug("check if specimen and library are linked") - self.assertEqual(result_response[0]['specimen_set'][0]['lab_specimen_id'], SPECIMEN_1["lab_specimen_id"], ) - self.assertEqual(result_response[0]['specimen_set'][0]['library_set'][0]['library_id'], - LIBRARY_1["library_id"], ) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/tests/utils.py b/lib/workload/stateless/stacks/metadata-manager/app/tests/utils.py index 99fbab835..2875b4395 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/tests/utils.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/tests/utils.py @@ -1,12 +1,16 @@ -from app.models import Subject, Specimen, Library -from app.tests.factories import LibraryFactory, SubjectFactory, SpecimenFactory +from app.models import Subject, Sample, Library, Project, Contact, Individual +from app.tests.factories import LibraryFactory, IndividualFactory, SubjectFactory, SampleFactory, \ + ProjectFactory, ContactFactory def clear_all_data(): """This function clear all existing models objcet""" Library.objects.all().delete() - Specimen.objects.all().delete() + Sample.objects.all().delete() Subject.objects.all().delete() + Project.objects.all().delete() + Contact.objects.all().delete() + Individual.objects.all().delete() def insert_mock_1(): @@ -16,12 +20,18 @@ def insert_mock_1(): clear_all_data() library = LibraryFactory() - specimen = SpecimenFactory() + sample = SampleFactory() subject = SubjectFactory() + contact = ContactFactory() + project = ProjectFactory() + individual = IndividualFactory() # Linking - library.specimen = specimen + project.contact_set.add(contact) + library.sample = sample + library.subject = subject + library.project_set.add(project) library.save() - specimen.subject = subject - specimen.save() + subject.individual_set.add(individual) + subject.save() diff --git a/lib/workload/stateless/stacks/metadata-manager/app/urls/base.py b/lib/workload/stateless/stacks/metadata-manager/app/urls/base.py index 7318308c8..39bd58c4c 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/urls/base.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/urls/base.py @@ -1,7 +1,7 @@ from django.urls import path, include from app.routers import OptionalSlashDefaultRouter -from app.viewsets import LibraryViewSet, SubjectViewSet, SampleViewSet, ProjectViewSet, ContactViewSet +from app.viewsets import LibraryViewSet, SubjectViewSet, SampleViewSet, ProjectViewSet, ContactViewSet, IndividualViewSet from app.settings.base import API_VERSION api_namespace = "api" @@ -9,6 +9,7 @@ api_base = f"{api_namespace}/{api_version}/" router = OptionalSlashDefaultRouter() +router.register(r"individual", IndividualViewSet, basename="individual") router.register(r"subject", SubjectViewSet, basename="subject") router.register(r"sample", SampleViewSet, basename="sample") router.register(r"library", LibraryViewSet, basename="library") diff --git a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/__init__.py b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/__init__.py index a6a2772b6..e2245adce 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/__init__.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/__init__.py @@ -3,3 +3,4 @@ from .subject import SubjectViewSet from .project import ProjectViewSet from .contact import ContactViewSet +from .individual import IndividualViewSet diff --git a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/base.py b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/base.py new file mode 100644 index 000000000..eb45124a4 --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/base.py @@ -0,0 +1,26 @@ +from abc import ABC +from rest_framework import filters +from django.shortcuts import get_object_or_404 +from app.pagination import StandardResultsSetPagination +from rest_framework.response import Response +from rest_framework.viewsets import ReadOnlyModelViewSet + + +class BaseViewSet(ReadOnlyModelViewSet, ABC): + orcabus_id_prefix = '' + ordering_fields = "__all__" + ordering = ["-orcabus_id"] + pagination_class = StandardResultsSetPagination + filter_backends = [filters.OrderingFilter, filters.SearchFilter] + + def retrieve(self, request, *args, **kwargs): + """ + Since we have custom orcabus_id prefix for each model, we need to remove the prefix before retrieving it. + """ + pk = self.kwargs.get('pk') + if pk and pk.startswith(self.orcabus_id_prefix): + pk = pk[len(self.orcabus_id_prefix):] + + obj = get_object_or_404(self.queryset, pk=pk) + serializer = self.serializer_class(obj) + return Response(serializer.data) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/contact.py b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/contact.py index ee30c345f..a8d2bef6d 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/contact.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/contact.py @@ -5,18 +5,14 @@ from app.serializers.contact import ContactSerializer from app.pagination import StandardResultsSetPagination -from rest_framework.viewsets import ReadOnlyModelViewSet +from .base import BaseViewSet -class ContactViewSet(ReadOnlyModelViewSet): - lookup_value_regex = "[^/]+" +class ContactViewSet(BaseViewSet): serializer_class = ContactSerializer - pagination_class = StandardResultsSetPagination - filter_backends = [filters.OrderingFilter, filters.SearchFilter] - ordering_fields = "__all__" - ordering = ["-orcabus_id"] search_fields = Contact.get_base_fields() - queryset = Contact.objects.none() + queryset = Contact.objects.all() + orcabus_id_prefix = Contact.orcabus_id_prefix @extend_schema(parameters=[ ContactSerializer diff --git a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/individual.py b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/individual.py new file mode 100644 index 000000000..8087c9791 --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/individual.py @@ -0,0 +1,22 @@ +from drf_spectacular.utils import extend_schema + +from app.models import Individual +from app.serializers.individual import IndividualDetailSerializer + +from .base import BaseViewSet + + +class IndividualViewSet(BaseViewSet): + serializer_class = IndividualDetailSerializer + search_fields = Individual.get_base_fields() + queryset = Individual.objects.prefetch_related('subject_set').all() + orcabus_id_prefix = Individual.orcabus_id_prefix + + @extend_schema(parameters=[ + IndividualDetailSerializer + ]) + def list(self, request, *args, **kwargs): + return super().list(request, *args, **kwargs) + + def get_queryset(self): + return Individual.objects.get_by_keyword(self.queryset, **self.request.query_params) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/library.py b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/library.py index 9749956dd..67597da07 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/library.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/library.py @@ -1,25 +1,18 @@ from drf_spectacular.utils import extend_schema -from rest_framework import filters - from app.models import Library from app.serializers.library import LibrarySerializer -from app.pagination import StandardResultsSetPagination -from rest_framework.viewsets import ReadOnlyModelViewSet +from .base import BaseViewSet -class LibraryViewSet(ReadOnlyModelViewSet): - lookup_value_regex = "[^/]+" +class LibraryViewSet(BaseViewSet): serializer_class = LibrarySerializer - pagination_class = StandardResultsSetPagination - filter_backends = [filters.OrderingFilter, filters.SearchFilter] - ordering_fields = "__all__" - ordering = ["-orcabus_id"] search_fields = Library.get_base_fields() - queryset = Library.objects.none() + queryset = Library.objects.select_related('sample').select_related('subject').prefetch_related('project_set').all() + orcabus_id_prefix = Library.orcabus_id_prefix def get_queryset(self): - qs = Library.objects.all() + qs = self.queryset query_params = self.request.query_params.copy() coverage__lte = query_params.get("coverage__lte", None) @@ -33,12 +26,10 @@ def get_queryset(self): qs = qs.filter(coverage__gte=coverage__gte) # Continue filtering by the keys inside the library model - return Library.objects.get_model_fields_query(qs, **query_params) + return Library.objects.get_by_keyword(qs, **query_params) @extend_schema(parameters=[ LibrarySerializer ]) def list(self, request, *args, **kwargs): return super().list(request, *args, **kwargs) - - diff --git a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/project.py b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/project.py index 79091ec49..ca36598f6 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/project.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/project.py @@ -1,22 +1,15 @@ from drf_spectacular.utils import extend_schema -from rest_framework import filters from app.models import Project from app.serializers.project import ProjectSerializer -from app.pagination import StandardResultsSetPagination -from rest_framework.viewsets import ReadOnlyModelViewSet +from .base import BaseViewSet -class ProjectViewSet(ReadOnlyModelViewSet): - lookup_value_regex = "[^/]+" +class ProjectViewSet(BaseViewSet): serializer_class = ProjectSerializer - pagination_class = StandardResultsSetPagination - filter_backends = [filters.OrderingFilter, filters.SearchFilter] - ordering_fields = "__all__" - ordering = ["-orcabus_id"] search_fields = Project.get_base_fields() - queryset = Project.objects.none() + queryset = Project.objects.prefetch_related("contact_set").all() @extend_schema(parameters=[ ProjectSerializer @@ -25,4 +18,4 @@ def list(self, request, *args, **kwargs): return super().list(request, *args, **kwargs) def get_queryset(self): - return Project.objects.get_by_keyword(**self.request.query_params) + return Project.objects.get_by_keyword(self.queryset, **self.request.query_params) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/sample.py b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/sample.py index 0a8a9740e..d3952cbf2 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/sample.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/sample.py @@ -5,18 +5,14 @@ from app.serializers.sample import SampleSerializer from app.pagination import StandardResultsSetPagination -from rest_framework.viewsets import ReadOnlyModelViewSet +from .base import BaseViewSet -class SampleViewSet(ReadOnlyModelViewSet): - lookup_value_regex = "[^/]+" +class SampleViewSet(BaseViewSet): serializer_class = SampleSerializer - pagination_class = StandardResultsSetPagination - filter_backends = [filters.OrderingFilter, filters.SearchFilter] - ordering_fields = "__all__" - ordering = ["-orcabus_id"] search_fields = Sample.get_base_fields() - queryset = Sample.objects.none() + queryset = Sample.objects.all() + orcabus_id_prefix = Sample.orcabus_id_prefix @extend_schema(parameters=[ SampleSerializer diff --git a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/subject.py b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/subject.py index ea9a1b097..523af74fa 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/subject.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/subject.py @@ -2,29 +2,22 @@ from rest_framework import filters from app.models import Subject -from app.serializers.subject import SubjectSerializer +from app.serializers.subject import SubjectDetailSerializer from app.pagination import StandardResultsSetPagination +from .base import BaseViewSet -from rest_framework.viewsets import ReadOnlyModelViewSet - -class SubjectViewSet(ReadOnlyModelViewSet): - lookup_value_regex = "[^/]+" - serializer_class = SubjectSerializer - pagination_class = StandardResultsSetPagination - filter_backends = [filters.OrderingFilter, filters.SearchFilter] - ordering_fields = "__all__" - ordering = ["-orcabus_id"] +class SubjectViewSet(BaseViewSet): + serializer_class = SubjectDetailSerializer search_fields = Subject.get_base_fields() - queryset = Subject.objects.none() + queryset = Subject.objects.prefetch_related('individual_set').all() + orcabus_id_prefix = Subject.orcabus_id_prefix @extend_schema(parameters=[ - SubjectSerializer + SubjectDetailSerializer ]) def list(self, request, *args, **kwargs): return super().list(request, *args, **kwargs) def get_queryset(self): - return Subject.objects.get_by_keyword(**self.request.query_params) - - + return Subject.objects.get_by_keyword(self.queryset, **self.request.query_params) diff --git a/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg b/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg index 6d2643e06..0984839ec 100644 --- a/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg +++ b/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg @@ -1,4 +1,4 @@ -LibraryPKorcabus_idlibrary_idphenotypeworkflow qualitytypeassaycoverageSubjectPKorcabus_idsubject_idexternal_subject_idSamplePKorcabus_idsample_idexternal_sample_idProjectOwnerPKproject_ownerProjectNamePKproject_name \ No newline at end of file +LibraryPKorcabus_idlibrary_idphenotypeworkflow qualitytypeassaycoverageSubjectPKorcabus_idsubject_idsubject_idSamplePKorcabus_idsample_idexternal_sample_idProjectOwnerPKproject_ownerProjectNamePKproject_name \ No newline at end of file diff --git a/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py b/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py index d6ed9c66c..8913c4e74 100644 --- a/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py +++ b/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py @@ -4,6 +4,7 @@ import pandas as pd import numpy as np +from django.core.exceptions import ObjectDoesNotExist from django.db import transaction from libumccr import libgdrive @@ -11,7 +12,7 @@ import logging -from app.models import Subject, Sample, Library,Project,Contact +from app.models import Subject, Sample, Library, Project, Contact, Individual from app.models.library import Quality, LibraryType, Phenotype, WorkflowType from app.models.sample import Source from app.models.utils import get_value_from_human_readable_label @@ -37,17 +38,40 @@ def persist_lab_metadata(df: pd.DataFrame, sheet_year: str): logger.info(f"Start processing LabMetadata") # Used for statistics - library_created = list() - library_updated = list() - library_deleted = list() - sample_created = list() - sample_updated = list() - sample_deleted = list() - subject_created = list() - subject_updated = list() - subject_deleted = list() - - rows_invalid = list() + stats = { + "library": { + "create_count": 0, + "update_count": 0, + "delete_count": 0, + }, + "sample": { + "create_count": 0, + "update_count": 0, + "delete_count": 0, + + }, + "subject": { + "create_count": 0, + "update_count": 0, + "delete_count": 0, + }, + "individual": { + "create_count": 0, + "update_count": 0, + "delete_count": 0, + }, + "project": { + "create_count": 0, + "update_count": 0, + "delete_count": 0, + }, + "contact": { + "create_count": 0, + "update_count": 0, + "delete_count": 0, + }, + 'invalid_record_count': 0, + } # The data frame is to be the source of truth for the particular year # So we need to remove db records which are not in the data frame @@ -58,25 +82,61 @@ def persist_lab_metadata(df: pd.DataFrame, sheet_year: str): library_prefix = f'L{sheet_year[-2:]}' for lib in Library.objects.filter(library_id__startswith=library_prefix).exclude( library_id__in=df['library_id'].tolist()).iterator(): - library_deleted.append(lib) + stats['library']['delete_count'] += 1 lib.delete() # this the where records are updated, inserted, linked based on library_id for record in df.to_dict('records'): try: # 1. update or create all data in the model from the given record + + # ------------------------------ + # Individual + # ------------------------------ + idv, is_idv_created, is_idv_updated = Individual.objects.update_or_create_if_needed( + search_key={ + "individual_id": record.get('subject_id'), + "source": "lab" + }, + data={ + "individual_id": record.get('subject_id'), + "source": "lab" + } + ) + if is_idv_created: + stats['individual']['create_count'] += 1 + if is_idv_updated: + stats['individual']['update_count'] += 1 + + # ------------------------------ + # Subject: We map the external_subject_id to the subject_id in the model + # ------------------------------ subject, is_sub_created, is_sub_updated = Subject.objects.update_or_create_if_needed( - search_key={"subject_id": record.get('subject_id')}, + search_key={"subject_id": record.get('external_subject_id')}, data={ - "subject_id": record.get('subject_id'), - "external_subject_id": record.get('external_subject_id'), + "subject_id": record.get('external_subject_id'), } ) + if is_sub_created: - subject_created.append(subject) + stats['subject']['create_count'] += 1 if is_sub_updated: - subject_updated.append(subject) - + stats['subject']['update_count'] += 1 + + # link individual to external subject + try: + subject.individual_set.get(orcabus_id=idv.orcabus_id) + except ObjectDoesNotExist: + subject.individual_set.add(idv) + + # We update the stats when new idv is linked to sbj, only if this is not recorded as + # update/create in previous upsert method + if not is_sub_created and not is_sub_updated: + stats['subject']['update_count'] += 1 + + # ------------------------------ + # Sample + # ------------------------------ sample, is_smp_created, is_smp_updated = Sample.objects.update_or_create_if_needed( search_key={"sample_id": record.get('sample_id')}, data={ @@ -86,25 +146,43 @@ def persist_lab_metadata(df: pd.DataFrame, sheet_year: str): } ) if is_smp_created: - sample_created.append(sample) + stats['sample']['create_count'] += 1 if is_smp_updated: - sample_updated.append(sample) + stats['sample']['create_update'] += 1 - contact, _is_cnt_created, _is_cnt_updated = Contact.objects.update_or_create_if_needed( + # ------------------------------ + # Contact + # ------------------------------ + contact, _is_ctc_created, _is_ctc_updated = Contact.objects.update_or_create_if_needed( search_key={"contact_id": record.get('project_owner')}, data={ "contact_id": record.get('project_owner'), } ) - project, _is_prj_created, _is_prj_updated = Project.objects.update_or_create_if_needed( + # ------------------------------ + # Project: Upsert project with contact as part of the project + # ------------------------------ + project, is_prj_created, is_prj_updated = Project.objects.update_or_create_if_needed( search_key={"project_id": record.get('project_name')}, data={ "project_id": record.get('project_name'), - "contact_id": contact.orcabus_id, } ) - + # link project to its contact + try: + project.contact_set.get(orcabus_id=contact.orcabus_id) + except ObjectDoesNotExist: + project.contact_set.add(contact) + + # We update the stats when new ctc is linked to prj, only if this is not recorded as + # update/create in previous upsert method + if not is_prj_created and not is_prj_updated: + stats['project']['update_count'] += 1 + + # ------------------------------ + # Library: Upsert library record with related sample, subject, project + # ------------------------------ library, is_lib_created, is_lib_updated = Library.objects.update_or_create_if_needed( search_key={"library_id": record.get('library_id')}, data={ @@ -119,44 +197,35 @@ def persist_lab_metadata(df: pd.DataFrame, sheet_year: str): # relationships 'sample_id': sample.orcabus_id, 'subject_id': subject.orcabus_id, - 'project_id': project.orcabus_id, - } ) if is_lib_created: - library_created.append(library) + stats['library']['create_count'] += 1 if is_lib_updated: - library_updated.append(library) + stats['library']['update_count'] += 1 + + # link library to its project + try: + library.project_set.get(orcabus_id=project.orcabus_id) + except ObjectDoesNotExist: + library.project_set.add(project) + + # We update the stats when new project is linked to library, only if this is not recorded as + # update/create in previous upsert method + if not is_lib_created and not is_lib_updated: + stats['library']['update_count'] += 1 except Exception as e: - if any(record.values()): # silent off blank row - print(f"Invalid record ({e}): {json.dumps(record, indent=2)}") - rows_invalid.append(record) + if any(record.values()): + logger.warning(f"Invalid record ({e}): {json.dumps(record, indent=2)}") + stats['invalid_record_count'] += 1 continue # clean up history for django-simple-history model if any - clean_model_history() + # Only clean for the past 15 minutes as this is what the maximum lambda cutoff + clean_model_history(minutes=15) - return { - "library": { - "new_count": len(library_created), - "update_count": len(library_updated), - "delete_count": len(library_deleted) - }, - "sample": { - "new_count": len(sample_created), - "update_count": len(sample_updated), - "delete_count": len(sample_deleted) - - }, - "subject": { - "new_count": len(subject_created), - "update_count": len(subject_updated), - "delete_count": len(subject_deleted) - - }, - 'invalid_record_count': len(rows_invalid), - } + return stats def download_tracking_sheet(year: str) -> pd.DataFrame: diff --git a/lib/workload/stateless/stacks/metadata-manager/proc/service/utils.py b/lib/workload/stateless/stacks/metadata-manager/proc/service/utils.py index 42f85de2e..91b831a69 100644 --- a/lib/workload/stateless/stacks/metadata-manager/proc/service/utils.py +++ b/lib/workload/stateless/stacks/metadata-manager/proc/service/utils.py @@ -6,14 +6,15 @@ logger.setLevel(logging.INFO) -def clean_model_history(): +def clean_model_history(minutes: int = None): """ The function will clean history for which where models have a history feature enabled When django uses the `save()` function, history table might be populated despite no changes (e.g. update_or_create). The history feature provided by django-simple-history track all signal that django sends to save model thus create duplicates. This clean function will remove these duplicates and only retain changes. + + Ref: https://django-simple-history.readthedocs.io/en/latest/utils.html """ - logger.info('removing duplicate history records') - call_command("clean_duplicate_history", "--auto", stdout=open(os.devnull, 'w')) - logger.info('duplicated history removed successfully') + logger.info(f'removing duplicate history records for the last {minutes} minutes if any') + call_command("clean_duplicate_history", "--auto", minutes=minutes, stdout=open(os.devnull, 'w')) diff --git a/lib/workload/stateless/stacks/metadata-manager/proc/tests/test_tracking_sheet_srv.py b/lib/workload/stateless/stacks/metadata-manager/proc/tests/test_tracking_sheet_srv.py index c83cfe091..596731836 100644 --- a/lib/workload/stateless/stacks/metadata-manager/proc/tests/test_tracking_sheet_srv.py +++ b/lib/workload/stateless/stacks/metadata-manager/proc/tests/test_tracking_sheet_srv.py @@ -1,7 +1,7 @@ import pandas as pd from django.test import TestCase -from app.models import Library, Specimen, Subject +from app.models import Library, Sample, Subject, Project, Contact, Individual from proc.service.tracking_sheet_srv import sanitize_lab_metadata_df, persist_lab_metadata @@ -82,14 +82,6 @@ "SampleName": "PRJ10003-IN_RUN_2", "rRNA": "" } -RECORD_3_DIFF_SBJ = { - **RECORD_3, - "SubjectID": "SBJ004" -} -RECORD_3_DIFF_SPC = { - **RECORD_3_DIFF_SBJ, - "SampleID": "PRJ10004" -} class TrackingSheetSrvUnitTests(TestCase): @@ -110,15 +102,13 @@ def test_persist_lab_metadata(self): metadata_pd = sanitize_lab_metadata_df(metadata_pd) result = persist_lab_metadata(metadata_pd, SHEET_YEAR) + # Stats check self.assertEqual(result.get("invalid_record_count"), 0, "non invalid record should exist") - - self.assertEqual(result.get("library").get("new_count"), 3, "3 new library should be created") + self.assertEqual(result.get("library").get("create_count"), 3, "3 new library should be created") self.assertEqual(result.get("library").get("update_count"), 0, "0 update in library") - - self.assertEqual(result.get("specimen").get("new_count"), 2, "2 new specimen should be created") - self.assertEqual(result.get("specimen").get("update_count"), 0, "no update in specimen") - - self.assertEqual(result.get("subject").get("new_count"), 1, "1 new subject should be created") + self.assertEqual(result.get("sample").get("create_count"), 2, "2 new sample should be created") + self.assertEqual(result.get("sample").get("update_count"), 0, "no update in sample") + self.assertEqual(result.get("subject").get("create_count"), 1, "1 new subject should be created") self.assertEqual(result.get("subject").get("update_count"), 0, "no update in subject") lib_1 = Library.objects.get(library_id=RECORD_1.get("LibraryID")) @@ -126,31 +116,48 @@ def test_persist_lab_metadata(self): self.assertEqual(lib_1.phenotype, RECORD_1.get("Phenotype"), "incorrect value (Phenotype) stored") self.assertEqual(lib_1.assay, RECORD_1.get("Assay"), "incorrect value (Assay) stored") self.assertEqual(lib_1.workflow, RECORD_1.get("Workflow"), "incorrect value (Workflow) stored") - self.assertEqual(lib_1.project_owner, RECORD_1.get("ProjectOwner"), "incorrect value (ProjectOwner) stored") - self.assertEqual(lib_1.project_name, RECORD_1.get("ProjectName"), "incorrect value (ProjectName) stored") - self.assertEqual(lib_1.specimen.specimen_id, RECORD_1.get("SampleID"), "incorrect specimen linked") + self.assertEqual(lib_1.sample.sample_id, RECORD_1.get("SampleID"), "incorrect sample linked") - spc_1 = Specimen.objects.get(lab_specimen_id=RECORD_1.get("SampleID")) - self.assertIsNotNone(spc_1) - self.assertEqual(spc_1.source, RECORD_1.get("Source"), "incorrect value stored") - self.assertEqual(spc_1.external_specimen_id, RECORD_1.get("ExternalSampleID"), "incorrect value stored") + smp_1 = Sample.objects.get(sample_id=RECORD_1.get("SampleID")) + self.assertIsNotNone(smp_1) + self.assertEqual(smp_1.source, RECORD_1.get("Source"), "incorrect value stored") + self.assertEqual(smp_1.external_sample_id, RECORD_1.get("ExternalSampleID"), "incorrect value stored") - sbj_1 = Subject.objects.get(lab_subject_id=RECORD_1.get("SubjectID")) + sbj_1 = Subject.objects.get(subject_id=RECORD_1.get("ExternalSubjectID")) self.assertIsNotNone(sbj_1) - self.assertEqual(sbj_1.external_subject_id, RECORD_1.get("ExternalSubjectID"), "incorrect value stored") + self.assertEqual(sbj_1.subject_id, RECORD_1.get("ExternalSubjectID"), "incorrect value stored") + + idv_1 = Individual.objects.get(individual_id=RECORD_1.get("SubjectID")) + self.assertIsNotNone(idv_1) + self.assertEqual(idv_1.individual_id, RECORD_1.get("SubjectID"), "incorrect value stored") + + ctc_1 = Contact.objects.get(contact_id=RECORD_1.get("ProjectOwner")) + self.assertIsNotNone(ctc_1) + self.assertEqual(ctc_1.contact_id, RECORD_1.get("ProjectOwner"), "incorrect value (ProjectOwner) stored") - # check relationships if lib_1 and lib_2 is in the same spc_1 - spc_lib_qs = spc_1.library_set.all() - self.assertEqual(spc_lib_qs.filter(library_id=RECORD_1.get("LibraryID")).count(), 1, - "lib_1 and spc_1 is not linked") - self.assertEqual(spc_lib_qs.filter(library_id=RECORD_2.get("LibraryID")).count(), 1, - "lib_2 and spc_1 is not linked") + prj_1 = Project.objects.get(project_id=RECORD_1.get("ProjectName")) + self.assertIsNotNone(prj_1) + self.assertEqual(prj_1.project_id, RECORD_1.get("ProjectName"), "incorrect value (ProjectName) stored") - # check if all lib is the same with sbj_1 + # check all relationships from each record for rec in mock_sheet_data: lib = Library.objects.get(library_id=rec.get("LibraryID")) - self.assertEqual(lib.specimen.subject.lab_subject_id, RECORD_1.get("SubjectID"), - "library is not linked to the same subject") + + ext_sbj = lib.subject + self.assertEqual(ext_sbj.subject_id, rec.get("ExternalSubjectID"), + 'incorrect library-subject link') + + smp = lib.sample + self.assertEqual(smp.sample_id, rec.get("SampleID"), 'incorrect library-sample link') + + idv = ext_sbj.individual_set.get(individual_id=rec.get("SubjectID")) + self.assertEqual(idv.individual_id, rec.get("SubjectID"), 'incorrect subject-individual link') + + prj = lib.project_set.get(project_id=rec.get("ProjectName")) + self.assertEqual(prj.project_id, rec.get("ProjectName"), 'incorrect library-project link') + + ctc = prj.contact_set.get(contact_id=rec.get("ProjectOwner")) + self.assertEqual(ctc.contact_id, rec.get("ProjectOwner"), 'incorrect project-contact link') def test_new_df_in_different_year(self) -> None: """ @@ -177,36 +184,85 @@ def test_new_df_in_different_year(self) -> None: lib_change = Library.objects.get(library_id=new_lib_id) self.assertIsNotNone(lib_change) - - def test_persist_lab_metadata_alter_sbj(self): + def test_alter_sbj_smp(self): """ - test where lib moved to different spc, and spc to different sbj - + test where lib moved to different subject and sample - python manage.py test proc.tests.test_tracking_sheet_srv.TrackingSheetSrvUnitTests.test_persist_lab_metadata_alter_sbj + python manage.py test proc.tests.test_tracking_sheet_srv.TrackingSheetSrvUnitTests.test_alter_sbj_smp """ metadata_pd = pd.json_normalize([RECORD_3]) metadata_pd = sanitize_lab_metadata_df(metadata_pd) persist_lab_metadata(metadata_pd, SHEET_YEAR) - metadata_pd = pd.json_normalize([RECORD_3_DIFF_SBJ]) + lib_3 = Library.objects.get(library_id=RECORD_3['LibraryID']) + self.assertEqual(lib_3.sample.sample_id, RECORD_3['SampleID'], 'incorrect link between lib and smp') + self.assertEqual(lib_3.subject.subject_id, RECORD_3['ExternalSubjectID'], + 'incorrect link between lib and sbj') + idv_3 = lib_3.subject.individual_set.get(individual_id=RECORD_3['SubjectID']) + self.assertIsNotNone(idv_3) + self.assertEqual(lib_3.subject.individual_set.count(), 1, 'only 1 individual should be linked') + + # Change smp and sample + record_3_altered = { + **RECORD_3, + "ExternalSubjectID": "EXT_SBJ004", + "SampleID": "PRJ10004", + } + + metadata_pd = pd.json_normalize([record_3_altered]) metadata_pd = sanitize_lab_metadata_df(metadata_pd) persist_lab_metadata(metadata_pd, SHEET_YEAR) - sbj_4 = Subject.objects.get(lab_subject_id=RECORD_3_DIFF_SBJ['SubjectID']) - self.assertIsNotNone(sbj_4) - spc_4 = sbj_4.specimen_set.get(lab_specimen_id=RECORD_3_DIFF_SBJ['SampleID']) - self.assertEqual(spc_4.lab_specimen_id, RECORD_3_DIFF_SBJ['SampleID'], - 'specimen obj should not change on link update') + lib_3_altered = Library.objects.get(library_id=record_3_altered['LibraryID']) + self.assertEqual(lib_3_altered.sample.sample_id, record_3_altered['SampleID'], + 'incorrect link between lib and smp') + self.assertEqual(lib_3_altered.subject.subject_id, record_3_altered['ExternalSubjectID'], + 'incorrect link between lib and sbj') + idv_3 = lib_3_altered.subject.individual_set.all() + self.assertIsNotNone(idv_3) + self.assertEqual(lib_3_altered.subject.individual_set.count(), 1, 'only 1 individual should be linked') + + def test_alter_idv_prj_ctc(self): + """ + test where object is move betweeb many-to-many relationship (idv, prj, ctc) - metadata_pd = pd.json_normalize([RECORD_3_DIFF_SPC]) + python manage.py test proc.tests.test_tracking_sheet_srv.TrackingSheetSrvUnitTests.test_alter_idv_prj_ctc + """ + + metadata_pd = pd.json_normalize([RECORD_3]) metadata_pd = sanitize_lab_metadata_df(metadata_pd) persist_lab_metadata(metadata_pd, SHEET_YEAR) - lib_3 = Library.objects.get(library_id=RECORD_3['LibraryID']) - self.assertEqual(lib_3.specimen.lab_specimen_id, RECORD_3_DIFF_SPC['SampleID'], - 'incorrect link between lib and spc when changing links') + sbj_3 = Subject.objects.get(subject_id=RECORD_3['ExternalSubjectID']) + self.assertIsNotNone(sbj_3) + self.assertEqual(sbj_3.individual_set.count(), 1, 'only 1 individual should be linked') + idv_3 = sbj_3.individual_set.get(individual_id=RECORD_3['SubjectID']) + self.assertIsNotNone(idv_3) + + # Change individual id + record_3_altered = { + **RECORD_3, + "SubjectID": "SBJ004", + "ProjectOwner": "Doe", + "ProjectName": "test", + } + + metadata_pd = pd.json_normalize([record_3_altered]) + metadata_pd = sanitize_lab_metadata_df(metadata_pd) + persist_lab_metadata(metadata_pd, SHEET_YEAR) + + # We don't unlink previous many-to-many relationships, but only add new ones + sbj_3 = Subject.objects.get(subject_id=record_3_altered['ExternalSubjectID']) + idv_3 = sbj_3.individual_set.get(individual_id=record_3_altered['SubjectID']) + self.assertIsNotNone(idv_3) + + prj_3 = Library.objects.get(library_id=record_3_altered['LibraryID']).project_set.get( + project_id=record_3_altered['ProjectName']) + self.assertIsNotNone(prj_3) + + ctc_3 = prj_3.contact_set.get(contact_id=record_3_altered['ProjectOwner']) + self.assertIsNotNone(ctc_3) def test_with_deleted_model(self) -> None: """ @@ -219,7 +275,6 @@ def test_with_deleted_model(self) -> None: persist_lab_metadata(metadata_pd, SHEET_YEAR) mock_sheet_data = [RECORD_3] - metadata_pd = pd.json_normalize(mock_sheet_data) metadata_pd = sanitize_lab_metadata_df(metadata_pd) result = persist_lab_metadata(metadata_pd, SHEET_YEAR) @@ -242,6 +297,6 @@ def test_save_choice_from_human_readable_label(self) -> None: metadata_pd = sanitize_lab_metadata_df(metadata_pd) persist_lab_metadata(metadata_pd, SHEET_YEAR) - spc = Specimen.objects.get(lab_specimen_id=mock_record.get("SampleID")) + spc = Sample.objects.get(sample_id=mock_record.get("SampleID")) self.assertIsNotNone(spc) self.assertEqual(spc.source, 'water', "incorrect value stored") From 9d8a90171027802b9d5d673a7b0101f7eb413296 Mon Sep 17 00:00:00 2001 From: william Date: Wed, 18 Sep 2024 23:01:30 +1000 Subject: [PATCH 06/11] Readme Update --- .../stacks/metadata-manager/README.md | 54 +++++++++++-------- .../metadata-manager/docs/schema.drawio.svg | 2 +- .../proc/service/tracking_sheet_srv.py | 1 + 3 files changed, 33 insertions(+), 24 deletions(-) diff --git a/lib/workload/stateless/stacks/metadata-manager/README.md b/lib/workload/stateless/stacks/metadata-manager/README.md index 2143035ea..68f56b99b 100644 --- a/lib/workload/stateless/stacks/metadata-manager/README.md +++ b/lib/workload/stateless/stacks/metadata-manager/README.md @@ -37,19 +37,27 @@ to the URL: `.../library?libraryId=LIB001` ## Schema -This is the current (WIP) schema that reflects the current implementation. +This is the current (WIP) schema that reflects the current implementation. The schema is based on the +draft [draw.io in Google Drive](https://app.diagrams.net/#G10ryWSXORMo7Qj7ghvj37LHYqmMm4hXW-#%7B%22pageId%22%3A%22vfe626awnvWGlhOGvxTV%22%7D) +. ![schema](docs/schema.drawio.svg) To modify the diagram, open the `docs/schema.drawio.svg` with [diagrams.net](https://app.diagrams.net/?src=about). -`orcabus_id` is the unique identifier for each record in the database. It is generated by the application where the -first 3 characters are the model prefix followed by [ULID](https://pypi.org/project/ulid-py/) separated by a dot (.). -The prefix is as follows: +The `orcabus_id` serves as the unique identifier for each record in the database. It is generated by the application +using the [ULID](https://pypi.org/project/ulid-py/) library. When a record is accessed via the API, the `orcabus_id` +is presented with a prefix consisting of three characters followed by a dot (.). The specific prefix varies depending +on the model of the record. -- Library model are `lib` -- Specimen model are `spc` -- Subject model are `sbj` +| Model | Prefix | +|------------|--------| +| Subject | `sbj.` | +| Sample | `smp.` | +| Library | `lib.` | +| Individual | `idv.` | +| Contact | `ctc.` | +| Project | `prj.` | ## How things work @@ -59,22 +67,22 @@ In the near future, we might introduce different ways to load data into the appl loading data from the Google tracking sheet and mapping it to its respective model as follows. -| Sheet Header | Table | Field Name | -|-------------------|------------|----------------------| -| SubjectID | `Subject` | lab_subject_id | -| ExternalSubjectID | `Subject` | subject_id | -| SampleID | `Specimen` | sample_id | -| ExternalSampleID | `Specimen` | external_specimen_id | -| Source | `Specimen` | source | -| LibraryID | `Library` | library_id | -| Phenotype | `Library` | phenotype | -| Workflow | `Library` | workflow | -| Quality | `Library` | quality | -| Type | `Library` | type | -| Coverage (X) | `Library` | coverage | -| Assay | `Library` | assay | -| ProjectOwner | `Library` | project_owner | -| ProjectName | `Library` | project_name | +| Sheet Header | Table | Field Name | +|-------------------|--------------|--------------------| +| SubjectID | `Individual` | individual_id | +| ExternalSubjectID | `Subject` | subject_id | +| SampleID | `Sample` | sample_id | +| ExternalSampleID | `Sample` | external_sample_id | +| Source | `Sample` | source | +| LibraryID | `Library` | library_id | +| Phenotype | `Library` | phenotype | +| Workflow | `Library` | workflow | +| Quality | `Library` | quality | +| Type | `Library` | type | +| Coverage (X) | `Library` | coverage | +| Assay | `Library` | assay | +| ProjectName | `Project` | project_id | +| ProjectOwner | `Contact` | contact_id | Some important notes of the sync: diff --git a/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg b/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg index 0984839ec..5ca6d206e 100644 --- a/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg +++ b/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg @@ -1,4 +1,4 @@ -LibraryPKorcabus_idlibrary_idphenotypeworkflow qualitytypeassaycoverageSubjectPKorcabus_idsubject_idsubject_idSamplePKorcabus_idsample_idexternal_sample_idProjectOwnerPKproject_ownerProjectNamePKproject_name \ No newline at end of file +LibraryPKorcabus_idlibrary_idphenotypeworkflow qualitytypeassaycoverageSubjectPKorcabus_idsubject_idSamplePKorcabus_idsample_idexternal_sample_idsourceProjectPKorcabus_idproject_idnamedescriptionContactPKorcabus_idcontact_idnamedescriptionemailindividualPKorcabus_idindividual_idsource \ No newline at end of file diff --git a/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py b/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py index 8913c4e74..aad78f4f8 100644 --- a/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py +++ b/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py @@ -76,6 +76,7 @@ def persist_lab_metadata(df: pd.DataFrame, sheet_year: str): # The data frame is to be the source of truth for the particular year # So we need to remove db records which are not in the data frame # Only doing this for library records and (dangling) sample/subject may be removed on a separate process + # Note: We do not remove many-to-many relationships if current df has changed # For the library_id we need craft the library_id prefix to match the year # E.g. year 2024, library_id prefix is 'L24' as what the Lab tracking sheet convention From ef3984bb73cb1c65c0d77b914e2e0d53375f9c78 Mon Sep 17 00:00:00 2001 From: william Date: Thu, 19 Sep 2024 10:15:34 +1000 Subject: [PATCH 07/11] update --- .../stacks/metadata-manager/README.md | 18 ++++++++-------- .../app/management/commands/__init__.py | 3 +++ .../metadata-manager/app/models/project.py | 2 +- .../metadata-manager/app/models/subject.py | 2 +- .../proc/service/tracking_sheet_srv.py | 21 ++++++++++++++++--- 5 files changed, 32 insertions(+), 14 deletions(-) diff --git a/lib/workload/stateless/stacks/metadata-manager/README.md b/lib/workload/stateless/stacks/metadata-manager/README.md index 68f56b99b..559423a14 100644 --- a/lib/workload/stateless/stacks/metadata-manager/README.md +++ b/lib/workload/stateless/stacks/metadata-manager/README.md @@ -52,11 +52,11 @@ on the model of the record. | Model | Prefix | |------------|--------| -| Subject | `sbj.` | -| Sample | `smp.` | -| Library | `lib.` | +| Subject | `sbj.` | +| Sample | `smp.` | +| Library | `lib.` | | Individual | `idv.` | -| Contact | `ctc.` | +| Contact | `ctc.` | | Project | `prj.` | ## How things work @@ -88,11 +88,11 @@ Some important notes of the sync: 1. The sync will only run from the current year. 2. The tracking sheet is the single source of truth for the current year. Any deletion or update to existing records - will be applied based on their internal IDs (`library_id`, `specimen_id`, and `subject_id`). For the library + will be applied based on their internal IDs (e.g. `library_id`, `subject_id`, etc. ). For the library model, the deletion will only occur based on the current year's prefix. For example, syncing the 2024 tracking - sheet will only query libraries with `library_id` starting with `L24` to determine whether to delete it. -3. `LibraryId` is treated as a unique value in the tracking sheet, so for any duplicated value (including from other - tabs) it will only recognize the last appearance. + sheet will only query libraries with `library_id` tarting with `L24` to determine whether to delete it. +3. `LibraryId` is treated as a unique value in the tracking sheet, so for any duplicated value will only recognize + the last appearance. 4. In cases where multiple records share the same unique identifier (such as SampleId), only the data from the most recent record is stored. For instance, if a SampleId appears twice with differing source values, only the values from the latter record will be retained. @@ -122,7 +122,7 @@ python3 --version Python 3.12.2 ``` -You would need to go to thisps microservice app directory from the root project +You would need to go to this microservice app directory from the root project ```bash cd lib/workload/stateless/stacks/metadata-manager diff --git a/lib/workload/stateless/stacks/metadata-manager/app/management/commands/__init__.py b/lib/workload/stateless/stacks/metadata-manager/app/management/commands/__init__.py index e69de29bb..baaf7fc54 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/management/commands/__init__.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/management/commands/__init__.py @@ -0,0 +1,3 @@ +import os +os.environ['SSM_NAME_GDRIVE_ACCOUNT'] = "/umccr/google/drive/lims_service_account_json" +os.environ["SSM_NAME_TRACKING_SHEET_ID"] = "/umccr/google/drive/tracking_sheet_id" diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/project.py b/lib/workload/stateless/stacks/metadata-manager/app/models/project.py index de16bc4fa..85865f9b5 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/project.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/project.py @@ -29,7 +29,7 @@ class Project(BaseModel): ) # Relationships - contact_set = models.ManyToManyField(Contact, related_name='project_set', blank=True, null=True) + contact_set = models.ManyToManyField(Contact, related_name='project_set', blank=True, ) # history history = HistoricalRecords(m2m_fields=[contact_set]) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/subject.py b/lib/workload/stateless/stacks/metadata-manager/app/models/subject.py index 1d9afad07..a857bf39b 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/subject.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/subject.py @@ -18,7 +18,7 @@ class Subject(BaseModel): ) # Relationships - individual_set = models.ManyToManyField('Individual', related_name='subject_set', blank=True, null=True) + individual_set = models.ManyToManyField('Individual', related_name='subject_set', blank=True, ) # history history = HistoricalRecords(m2m_fields=[individual_set]) diff --git a/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py b/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py index aad78f4f8..77f200d25 100644 --- a/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py +++ b/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py @@ -38,6 +38,7 @@ def persist_lab_metadata(df: pd.DataFrame, sheet_year: str): logger.info(f"Start processing LabMetadata") # Used for statistics + invalid_data = [] stats = { "library": { "create_count": 0, @@ -149,17 +150,21 @@ def persist_lab_metadata(df: pd.DataFrame, sheet_year: str): if is_smp_created: stats['sample']['create_count'] += 1 if is_smp_updated: - stats['sample']['create_update'] += 1 + stats['sample']['update_count'] += 1 # ------------------------------ # Contact # ------------------------------ - contact, _is_ctc_created, _is_ctc_updated = Contact.objects.update_or_create_if_needed( + contact, is_ctc_created, is_ctc_updated = Contact.objects.update_or_create_if_needed( search_key={"contact_id": record.get('project_owner')}, data={ "contact_id": record.get('project_owner'), } ) + if is_ctc_created: + stats['contact']['create_count'] += 1 + if is_ctc_updated: + stats['contact']['update_count'] += 1 # ------------------------------ # Project: Upsert project with contact as part of the project @@ -170,6 +175,11 @@ def persist_lab_metadata(df: pd.DataFrame, sheet_year: str): "project_id": record.get('project_name'), } ) + if is_prj_created: + stats['project']['create_count'] += 1 + if is_prj_updated: + stats['project']['update_count'] += 1 + # link project to its contact try: project.contact_set.get(orcabus_id=contact.orcabus_id) @@ -218,14 +228,19 @@ def persist_lab_metadata(df: pd.DataFrame, sheet_year: str): except Exception as e: if any(record.values()): - logger.warning(f"Invalid record ({e}): {json.dumps(record, indent=2)}") stats['invalid_record_count'] += 1 + invalid_data.append({ + "reason": e, + "data": record + }) continue # clean up history for django-simple-history model if any # Only clean for the past 15 minutes as this is what the maximum lambda cutoff clean_model_history(minutes=15) + logger.warning(f"Invalid record: {invalid_data}") + logger.info(f"Processed LabMetadata: {json.dumps(stats)}") return stats From ddcb13516cc8ffbbecaf13a144ca31347a7de681 Mon Sep 17 00:00:00 2001 From: william Date: Thu, 19 Sep 2024 11:24:23 +1000 Subject: [PATCH 08/11] Update 0001_initial.py --- .../stacks/metadata-manager/app/migrations/0001_initial.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py b/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py index fe3745c16..b9e88ca85 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 5.1 on 2024-09-18 11:57 +# Generated by Django 5.1 on 2024-09-19 01:23 import django.core.validators import django.db.models.deletion @@ -188,7 +188,7 @@ class Migration(migrations.Migration): ('project_id', models.CharField(blank=True, null=True, unique=True)), ('name', models.CharField(blank=True, null=True)), ('description', models.CharField(blank=True, null=True)), - ('contact_set', models.ManyToManyField(blank=True, null=True, related_name='project_set', to='app.contact')), + ('contact_set', models.ManyToManyField(blank=True, related_name='project_set', to='app.contact')), ], options={ 'abstract': False, @@ -245,7 +245,7 @@ class Migration(migrations.Migration): fields=[ ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='ULID is expected to be 26 characters long', regex='[\\w]{26}$')])), ('subject_id', models.CharField(blank=True, null=True, unique=True)), - ('individual_set', models.ManyToManyField(blank=True, null=True, related_name='subject_set', to='app.individual')), + ('individual_set', models.ManyToManyField(blank=True, related_name='subject_set', to='app.individual')), ], options={ 'abstract': False, From 978ce550a7f18a84747a359987a2585b3b6e1684 Mon Sep 17 00:00:00 2001 From: william Date: Thu, 19 Sep 2024 14:55:34 +1000 Subject: [PATCH 09/11] migration --- .../stacks/metadata-manager/app/migrations/0001_initial.py | 4 ++-- .../stateless/stacks/metadata-manager/app/models/library.py | 2 +- .../stateless/stacks/metadata-manager/app/models/project.py | 2 +- .../stateless/stacks/metadata-manager/app/models/subject.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py b/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py index b9e88ca85..baf05eabb 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 5.1 on 2024-09-19 01:23 +# Generated by Django 5.1 on 2024-09-19 04:49 import django.core.validators import django.db.models.deletion @@ -205,7 +205,7 @@ class Migration(migrations.Migration): ('type', models.CharField(blank=True, choices=[('10X', 'Ten X'), ('BiModal', 'Bimodal'), ('ctDNA', 'Ct Dna'), ('ctTSO', 'Ct Tso'), ('exome', 'Exome'), ('MeDIP', 'Me Dip'), ('Metagenm', 'Metagenm'), ('MethylSeq', 'Methyl Seq'), ('TSO-DNA', 'TSO_DNA'), ('TSO-RNA', 'TSO_RNA'), ('WGS', 'Wgs'), ('WTS', 'Wts'), ('other', 'Other')], null=True)), ('assay', models.CharField(blank=True, null=True)), ('coverage', models.FloatField(blank=True, null=True)), - ('project_set', models.ManyToManyField(blank=True, null=True, related_name='library_set', to='app.project')), + ('project_set', models.ManyToManyField(blank=True, related_name='library_set', to='app.project')), ('sample', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='app.sample')), ], options={ diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/library.py b/lib/workload/stateless/stacks/metadata-manager/app/models/library.py index 3252f0636..c04b95b95 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/library.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/library.py @@ -95,7 +95,7 @@ class Library(BaseModel): # Relationships sample = models.ForeignKey(Sample, on_delete=models.SET_NULL, blank=True, null=True) subject = models.ForeignKey(Subject, on_delete=models.SET_NULL, blank=True, null=True) - project_set = models.ManyToManyField(Project, related_name='library_set', blank=True, null=True) + project_set = models.ManyToManyField(Project, related_name='library_set', blank=True) # history history = HistoricalRecords(m2m_fields=[project_set]) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/project.py b/lib/workload/stateless/stacks/metadata-manager/app/models/project.py index 85865f9b5..559b66a9f 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/project.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/project.py @@ -29,7 +29,7 @@ class Project(BaseModel): ) # Relationships - contact_set = models.ManyToManyField(Contact, related_name='project_set', blank=True, ) + contact_set = models.ManyToManyField(Contact, related_name='project_set', blank=True) # history history = HistoricalRecords(m2m_fields=[contact_set]) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/subject.py b/lib/workload/stateless/stacks/metadata-manager/app/models/subject.py index a857bf39b..12114b2b9 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/subject.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/subject.py @@ -18,7 +18,7 @@ class Subject(BaseModel): ) # Relationships - individual_set = models.ManyToManyField('Individual', related_name='subject_set', blank=True, ) + individual_set = models.ManyToManyField('Individual', related_name='subject_set', blank=True) # history history = HistoricalRecords(m2m_fields=[individual_set]) From 0c262d55a96ea86a9d9aec7b48d5eb20adf52ff3 Mon Sep 17 00:00:00 2001 From: william Date: Thu, 19 Sep 2024 15:03:15 +1000 Subject: [PATCH 10/11] add custom csv loader --- .../app/management/commands/load_from_csv.py | 22 ++ .../metadata-manager/app/models/library.py | 14 + .../handler/load_custom_metadata_csv.py | 35 +++ .../handler/sync_tracking_sheet.py | 4 +- .../proc/service/load_csv_srv.py | 243 ++++++++++++++++++ .../proc/service/tracking_sheet_srv.py | 74 +----- .../metadata-manager/proc/service/utils.py | 62 +++++ 7 files changed, 379 insertions(+), 75 deletions(-) create mode 100644 lib/workload/stateless/stacks/metadata-manager/app/management/commands/load_from_csv.py create mode 100644 lib/workload/stateless/stacks/metadata-manager/handler/load_custom_metadata_csv.py create mode 100644 lib/workload/stateless/stacks/metadata-manager/proc/service/load_csv_srv.py diff --git a/lib/workload/stateless/stacks/metadata-manager/app/management/commands/load_from_csv.py b/lib/workload/stateless/stacks/metadata-manager/app/management/commands/load_from_csv.py new file mode 100644 index 000000000..99017aa1c --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/app/management/commands/load_from_csv.py @@ -0,0 +1,22 @@ +import logging +from django.core.management import BaseCommand +from libumccr import libjson + +from handler.load_custom_metadata_csv import handler + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + + +class Command(BaseCommand): + help = "Trigger lambda handler for to sync metadata from csv url" + + def handle(self, *args, **options): + event = { + "url" :"SOME_URL", + } + + print(f"Trigger lambda handler for sync tracking sheet. Event {libjson.dumps(event)}") + result = handler(event, {}) + + print(f"result: {libjson.dumps(result)}") diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/library.py b/lib/workload/stateless/stacks/metadata-manager/app/models/library.py index c04b95b95..6957b30a2 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/library.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/library.py @@ -99,3 +99,17 @@ class Library(BaseModel): # history history = HistoricalRecords(m2m_fields=[project_set]) + + +def sanitize_library_coverage(value: str): + """ + convert value that is valid in the tracking sheet to return a value that is recognizable by the Django Model + """ + try: + # making coverage is float-able type + lib_coverage = float(value) + return f'{lib_coverage}' + + except (ValueError, TypeError): + return None + diff --git a/lib/workload/stateless/stacks/metadata-manager/handler/load_custom_metadata_csv.py b/lib/workload/stateless/stacks/metadata-manager/handler/load_custom_metadata_csv.py new file mode 100644 index 000000000..a54a7662a --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/handler/load_custom_metadata_csv.py @@ -0,0 +1,35 @@ +import django +import os +import logging + +from libumccr import libjson + +from proc.service.utils import sanitize_lab_metadata_df, warn_drop_duplicated_library + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'app.settings.base') +django.setup() + +from proc.service.load_csv_srv import load_metadata_csv, download_csv_to_pandas + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + + +def handler(event, _context): + logger.info(f'event: {libjson.dumps(event)}') + + csv_url = event.get('url', None) + if csv_url is None: + raise ValueError("URL is required") + + csv_df = download_csv_to_pandas(csv_url) + sanitize_df = sanitize_lab_metadata_df(csv_df) + duplicate_clean_df = warn_drop_duplicated_library(sanitize_df) + result = load_metadata_csv(duplicate_clean_df) + + logger.info(f'persist report: {libjson.dumps(result)}') + return result + + +if __name__ == '__main__': + handler({}, {}) diff --git a/lib/workload/stateless/stacks/metadata-manager/handler/sync_tracking_sheet.py b/lib/workload/stateless/stacks/metadata-manager/handler/sync_tracking_sheet.py index 4a32eca47..b12bec70e 100644 --- a/lib/workload/stateless/stacks/metadata-manager/handler/sync_tracking_sheet.py +++ b/lib/workload/stateless/stacks/metadata-manager/handler/sync_tracking_sheet.py @@ -7,8 +7,8 @@ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'app.settings.base') django.setup() -from proc.service.tracking_sheet_srv import download_tracking_sheet, sanitize_lab_metadata_df, persist_lab_metadata, \ - warn_drop_duplicated_library +from proc.service.tracking_sheet_srv import download_tracking_sheet, persist_lab_metadata +from proc.service.utils import sanitize_lab_metadata_df, warn_drop_duplicated_library logger = logging.getLogger() logger.setLevel(logging.INFO) diff --git a/lib/workload/stateless/stacks/metadata-manager/proc/service/load_csv_srv.py b/lib/workload/stateless/stacks/metadata-manager/proc/service/load_csv_srv.py new file mode 100644 index 000000000..a62178f41 --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/proc/service/load_csv_srv.py @@ -0,0 +1,243 @@ +import json +import logging +import pandas as pd +from django.core.exceptions import ObjectDoesNotExist +from django.db import transaction + +from app.models import Subject, Sample, Library, Project, Contact, Individual +from app.models.library import Quality, LibraryType, Phenotype, WorkflowType, sanitize_library_coverage +from app.models.sample import Source +from app.models.utils import get_value_from_human_readable_label +from proc.service.utils import clean_model_history + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + + +@transaction.atomic +def load_metadata_csv(df: pd.DataFrame): + """ + Persist metadata records from a pandas dataframe into the db. No record deletion is performed in this method. + + Args: + df (pd.DataFrame): The source of truth for the metadata in this particular year + + """ + logger.info(f"Start processing LabMetadata") + + # Used for statistics + invalid_data = [] + stats = { + "library": { + "create_count": 0, + "update_count": 0, + }, + "sample": { + "create_count": 0, + "update_count": 0, + + }, + "subject": { + "create_count": 0, + "update_count": 0, + }, + "individual": { + "create_count": 0, + "update_count": 0, + }, + "project": { + "create_count": 0, + "update_count": 0, + }, + "contact": { + "create_count": 0, + "update_count": 0, + "delete_count": 0, + }, + 'invalid_record_count': 0, + } + + # this the where records are updated, inserted, linked based on library_id + for record in df.to_dict('records'): + try: + # 1. update or create all data in the model from the given record + + # ------------------------------ + # Individual + # ------------------------------ + idv = None + individual_id = record.get('individual_id') + source = record.get('source') + + if individual_id and source: + + idv, is_idv_created, is_idv_updated = Individual.objects.update_or_create_if_needed( + search_key={ + "individual_id": individual_id, + "source": source + }, + data={ + "individual_id": individual_id, + "source": source + } + ) + if is_idv_created: + stats['individual']['create_count'] += 1 + if is_idv_updated: + stats['individual']['update_count'] += 1 + + # ------------------------------ + # Subject + # ------------------------------ + + subject_id = record.get('subject_id') + subject, is_sub_created, is_sub_updated = Subject.objects.update_or_create_if_needed( + search_key={"subject_id": subject_id}, + data={ + "subject_id": subject_id, + } + ) + + if is_sub_created: + stats['subject']['create_count'] += 1 + if is_sub_updated: + stats['subject']['update_count'] += 1 + + if idv: + # link individual to external subject + try: + subject.individual_set.get(orcabus_id=idv.orcabus_id) + except ObjectDoesNotExist: + subject.individual_set.add(idv) + + # We update the stats when new idv is linked to sbj, only if this is not recorded as + # update/create in previous upsert method + if not is_sub_created and not is_sub_updated: + stats['subject']['update_count'] += 1 + + # ------------------------------ + # Sample + # ------------------------------ + sample = None + sample_id = record.get('sample_id') + if sample_id: + sample, is_smp_created, is_smp_updated = Sample.objects.update_or_create_if_needed( + search_key={"sample_id": sample_id}, + data={ + "sample_id": record.get('sample_id'), + "external_sample_id": record.get('external_sample_id'), + "source": get_value_from_human_readable_label(Source.choices, record.get('source')), + } + ) + if is_smp_created: + stats['sample']['create_count'] += 1 + if is_smp_updated: + stats['sample']['update_count'] += 1 + + # ------------------------------ + # Contact + # ------------------------------ + contact = None + contact_id = record.get('project_owner') + + if contact_id: + contact, is_ctc_created, is_ctc_updated = Contact.objects.update_or_create_if_needed( + search_key={"contact_id": record.get('project_owner')}, + data={ + "contact_id": record.get('project_owner'), + } + ) + if is_ctc_created: + stats['contact']['create_count'] += 1 + if is_ctc_updated: + stats['contact']['update_count'] += 1 + + # ------------------------------ + # Project: Upsert project with contact as part of the project + # ------------------------------ + project = None + + project_id = record.get('project_name') + if project_id: + project, is_prj_created, is_prj_updated = Project.objects.update_or_create_if_needed( + search_key={"project_id": record.get('project_name')}, + data={ + "project_id": record.get('project_name'), + } + ) + if is_prj_created: + stats['project']['create_count'] += 1 + if is_prj_updated: + stats['project']['update_count'] += 1 + + # link project to its contact of exist + if contact: + try: + project.contact_set.get(orcabus_id=contact.orcabus_id) + except ObjectDoesNotExist: + project.contact_set.add(contact) + + # We update the stats when new ctc is linked to prj, only if this is not recorded as + # update/create in previous upsert method + if not is_prj_created and not is_prj_updated: + stats['project']['update_count'] += 1 + + # ------------------------------ + # Library: Upsert library record with related sample, subject, project + # ------------------------------ + library, is_lib_created, is_lib_updated = Library.objects.update_or_create_if_needed( + search_key={"library_id": record.get('library_id')}, + data={ + 'library_id': record.get('library_id'), + 'phenotype': get_value_from_human_readable_label(Phenotype.choices, record.get('phenotype')), + 'workflow': get_value_from_human_readable_label(WorkflowType.choices, record.get('workflow')), + 'quality': get_value_from_human_readable_label(Quality.choices, record.get('quality')), + 'type': get_value_from_human_readable_label(LibraryType.choices, record.get('type')), + 'assay': record.get('assay'), + 'coverage': sanitize_library_coverage(record.get('coverage')), + + # relationships + 'sample_id': sample.orcabus_id, + 'subject_id': subject.orcabus_id, + } + ) + if is_lib_created: + stats['library']['create_count'] += 1 + if is_lib_updated: + stats['library']['update_count'] += 1 + + # link library to its project + if project: + try: + library.project_set.get(orcabus_id=project.orcabus_id) + except ObjectDoesNotExist: + library.project_set.add(project) + + # We update the stats when new project is linked to library, only if this is not recorded as + # update/create in previous upsert method + if not is_lib_created and not is_lib_updated: + stats['library']['update_count'] += 1 + + except Exception as e: + if any(record.values()): + stats['invalid_record_count'] += 1 + invalid_data.append({ + "reason": e, + "data": record + }) + continue + + # clean up history for django-simple-history model if any + # Only clean for the past 15 minutes as this is what the maximum lambda cutoff + clean_model_history(minutes=15) + + logger.warning(f"Invalid record: {invalid_data}") + logger.info(f"Processed LabMetadata: {json.dumps(stats)}") + return stats + + +def download_csv_to_pandas(url: str) -> pd.DataFrame: + """ + Download csv file from a given url and return it as a pandas dataframe + """ + return pd.read_csv(url) diff --git a/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py b/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py index 77f200d25..399453094 100644 --- a/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py +++ b/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py @@ -1,9 +1,7 @@ import os -import re import json import pandas as pd -import numpy as np from django.core.exceptions import ObjectDoesNotExist from django.db import transaction @@ -13,7 +11,7 @@ import logging from app.models import Subject, Sample, Library, Project, Contact, Individual -from app.models.library import Quality, LibraryType, Phenotype, WorkflowType +from app.models.library import Quality, LibraryType, Phenotype, WorkflowType, sanitize_library_coverage from app.models.sample import Source from app.models.utils import get_value_from_human_readable_label from proc.service.utils import clean_model_history @@ -262,73 +260,3 @@ def download_tracking_sheet(year: str) -> pd.DataFrame: return df -def sanitize_lab_metadata_df(df: pd.DataFrame): - """ - sanitize record by renaming columns, and clean df cells - """ - - df = clean_columns(df) - df = df.map(_clean_data_cell) - - # dropping any rows that library_id == '' - df = df.drop(df[df.library_id.isnull()].index, errors='ignore') - - # dropping column that has empty column heading - df = df.drop('', axis='columns', errors='ignore') - - df = df.reset_index(drop=True) - return df - - -def warn_drop_duplicated_library(df: pd.DataFrame) -> pd.DataFrame: - """ - log warning messages if duplicated library_id found - """ - # some warning for duplicates - dup_lib_list = df[df.duplicated(subset=['library_id'], keep='last')]["library_id"].tolist() - if len(dup_lib_list) > 0: - logger.warning(f"data contain duplicate libraries: {', '.join(dup_lib_list)}") - - return df.drop_duplicates(subset=['library_id'], keep='last') - - -def clean_columns(df: pd.DataFrame) -> pd.DataFrame: - """ - clean a dataframe of labmetadata from a tracking sheet to correspond to the django object model - we do this by editing the columns to match the django object - """ - # remove unnamed - df = df.loc[:, ~df.columns.str.contains('^Unnamed')] - - # simplify verbose column names - df = df.rename(columns={'Coverage (X)': 'coverage', "TruSeq Index, unless stated": "truseqindex"}) - - # convert PascalCase headers to snake_case and fix ID going to _i_d - pattern = re.compile(r'(? pd.DataFrame: + """ + log warning messages if duplicated library_id found + """ + # some warning for duplicates + dup_lib_list = df[df.duplicated(subset=['library_id'], keep='last')]["library_id"].tolist() + if len(dup_lib_list) > 0: + logger.warning(f"data contain duplicate libraries: {', '.join(dup_lib_list)}") + + return df.drop_duplicates(subset=['library_id'], keep='last') + + +def clean_columns(df: pd.DataFrame) -> pd.DataFrame: + """ + clean a dataframe from a tracking sheet to correspond to the django object model + we do this by editing the columns to match the django object + """ + # remove unnamed + df = df.loc[:, ~df.columns.str.contains('^Unnamed')] + + # simplify verbose column names + df = df.rename(columns={'Coverage (X)': 'coverage', "TruSeq Index, unless stated": "truseqindex"}) + + # convert PascalCase headers to snake_case and fix ID going to _i_d + pattern = re.compile(r'(? Date: Thu, 19 Sep 2024 16:51:23 +1000 Subject: [PATCH 11/11] IaC for load custom csv lambda --- .../stacks/metadata-manager/README.md | 14 ++++- .../stacks/metadata-manager/deploy/README.md | 32 +++++++++++ .../construct/lambda-load-custom-csv/index.ts | 54 +++++++++++++++++++ .../lambda-load-custom-csv/lambda.Dockerfile | 12 +++++ .../docs/architecture.drawio.svg | 2 +- 5 files changed, 112 insertions(+), 2 deletions(-) create mode 100644 lib/workload/stateless/stacks/metadata-manager/deploy/construct/lambda-load-custom-csv/index.ts create mode 100644 lib/workload/stateless/stacks/metadata-manager/deploy/construct/lambda-load-custom-csv/lambda.Dockerfile diff --git a/lib/workload/stateless/stacks/metadata-manager/README.md b/lib/workload/stateless/stacks/metadata-manager/README.md index 559423a14..c1c7f919e 100644 --- a/lib/workload/stateless/stacks/metadata-manager/README.md +++ b/lib/workload/stateless/stacks/metadata-manager/README.md @@ -61,7 +61,7 @@ on the model of the record. ## How things work -### How Syncing The Data Works +### How Tracking Sheet Syncing Works In the near future, we might introduce different ways to load data into the application. For the time being, we are loading data @@ -100,6 +100,18 @@ Some important notes of the sync: Please refer to the [tracking-sheet-service](proc/service/tracking_sheet_srv.py) implementation. +### Loading from external csv + +The Metadata Manager has the capability to import metadata from an external CSV file. This CSV file should follow the +same mapping structure as specified in the tracking sync process. The loading operation utilizes a presigned URL, which +is subsequently used to load the data into the Metadata Manager. Not all header should be present in the CSV file, but +the required fields are: + +- `library_id` +- `subject_id` + +To trigger this operation, trigger from the lambda specified in `./deploy/README.md`. + ### Audit Data The application is configured with [django-simple-history](https://django-simple-history.readthedocs.io/en/latest/) diff --git a/lib/workload/stateless/stacks/metadata-manager/deploy/README.md b/lib/workload/stateless/stacks/metadata-manager/deploy/README.md index 554400d13..502113b24 100644 --- a/lib/workload/stateless/stacks/metadata-manager/deploy/README.md +++ b/lib/workload/stateless/stacks/metadata-manager/deploy/README.md @@ -58,3 +58,35 @@ aws lambda invoke \ --cli-binary-format raw-in-base64-out \ res.json ``` + +### CustomCsvLambda + +- Load tracking sheet data from csv presigned url + +To manually trigger the sync, the lambda ARN is stored in the SSM Parameter Store named +`/orcabus/metadata-manager/load-custom-csv-lambda-arn`. + +To query in a local terminal + +```sh +load_custom_csv_lambda_arn=$(aws ssm get-parameter --name '/orcabus/metadata-manager/load-custom-csv-lambda-arn' --with-decryption | jq -r .Parameter.Value) +``` + +The lambda handler will accept a json which only accepts a single key `url` which is the presigned url of the csv file. + +```json +{ + "url": "https://example.com/csv" +} +``` + +Invoking lambda cmd: + +```sh +aws lambda invoke \ + --function-name load_custom_csv_lambda_arn \ + --invocation-type Event \ + --payload '{ "url": "https://the.url.csv" }' \ + --cli-binary-format raw-in-base64-out \ + res.json +``` diff --git a/lib/workload/stateless/stacks/metadata-manager/deploy/construct/lambda-load-custom-csv/index.ts b/lib/workload/stateless/stacks/metadata-manager/deploy/construct/lambda-load-custom-csv/index.ts new file mode 100644 index 000000000..46ccf74ef --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/deploy/construct/lambda-load-custom-csv/index.ts @@ -0,0 +1,54 @@ +import path from 'path'; +import { Construct } from 'constructs'; +import { Duration } from 'aws-cdk-lib'; +import { PythonFunction } from '@aws-cdk/aws-lambda-python-alpha'; +import { ISecret } from 'aws-cdk-lib/aws-secretsmanager'; +import { StringParameter } from 'aws-cdk-lib/aws-ssm'; +import { + DockerImageFunction, + DockerImageFunctionProps, + DockerImageCode, +} from 'aws-cdk-lib/aws-lambda'; + +type LambdaProps = { + /** + * The basic common lambda properties that it should inherit from + */ + basicLambdaConfig: Partial; + /** + * The secret for the db connection where the lambda will need access to + */ + dbConnectionSecret: ISecret; +}; + +export class LambdaLoadCustomCSVConstruct extends Construct { + private readonly lambda: PythonFunction; + + constructor(scope: Construct, id: string, lambdaProps: LambdaProps) { + super(scope, id); + + this.lambda = new DockerImageFunction(this, 'LoadCustomCSVLambda', { + environment: { + ...lambdaProps.basicLambdaConfig.environment, + }, + securityGroups: lambdaProps.basicLambdaConfig.securityGroups, + vpc: lambdaProps.basicLambdaConfig.vpc, + vpcSubnets: lambdaProps.basicLambdaConfig.vpcSubnets, + architecture: lambdaProps.basicLambdaConfig.architecture, + code: DockerImageCode.fromImageAsset(path.join(__dirname, '../../../'), { + file: 'deploy/construct/lambda-load-custom-csv/lambda.Dockerfile', + }), + timeout: Duration.minutes(15), + memorySize: 4096, + }); + + lambdaProps.dbConnectionSecret.grantRead(this.lambda); + + // We need to store this lambda ARN somewhere so that we could refer when need to load this manually + const ssmParameter = new StringParameter(this, 'LoadCustomCSVLambdaArnParameterStore', { + parameterName: '/orcabus/metadata-manager/load-custom-csv-lambda-arn', + description: 'The ARN of the lambda that load metadata from a presigned URL CSV file', + stringValue: this.lambda.functionArn, + }); + } +} diff --git a/lib/workload/stateless/stacks/metadata-manager/deploy/construct/lambda-load-custom-csv/lambda.Dockerfile b/lib/workload/stateless/stacks/metadata-manager/deploy/construct/lambda-load-custom-csv/lambda.Dockerfile new file mode 100644 index 000000000..f880a59ed --- /dev/null +++ b/lib/workload/stateless/stacks/metadata-manager/deploy/construct/lambda-load-custom-csv/lambda.Dockerfile @@ -0,0 +1,12 @@ +FROM public.ecr.aws/lambda/python:3.12 + +WORKDIR ${LAMBDA_TASK_ROOT} + +# COPY all files +COPY . . + +# Install the specified packages +RUN pip install -r deps/requirements-full.txt + +# Specify handler +CMD [ "handler.load_custom_metadata_csv.handler" ] diff --git a/lib/workload/stateless/stacks/metadata-manager/docs/architecture.drawio.svg b/lib/workload/stateless/stacks/metadata-manager/docs/architecture.drawio.svg index 29207b691..c2fc82cb4 100644 --- a/lib/workload/stateless/stacks/metadata-manager/docs/architecture.drawio.svg +++ b/lib/workload/stateless/stacks/metadata-manager/docs/architecture.drawio.svg @@ -1,4 +1,4 @@ -
API Lambda
API Lambda
sync-ghseet
sync-ghseet
migration
migration
API Gateway
API Gateway
Scheduled Event
(PLANNED)
Scheduled Eve...
PostgresSQL Instance
(Shared with other microservices)
PostgresSQL I...
Secret Manager 
(Access db connection string)
Secret Manage...
Text is not SVG - cannot display
\ No newline at end of file +
API Lambda
API Lambda
sync-ghseet
sync-ghseet
csv-loader
csv-loader
API Gateway
API Gateway
Scheduled Event
(PROD only)
Scheduled Eve...
PostgresSQL Instance
(Shared with other microservices)
PostgresSQL I...
Secret Manager 
(Access db connection string)
Secret Manage...
Manual trigger
Manual tri...
migration
migration
Text is not SVG - cannot display
\ No newline at end of file