From f3d2e39b3e5b05558b9f195e4d11137c664b32ba Mon Sep 17 00:00:00 2001 From: Florian Reisinger Date: Tue, 13 Aug 2024 16:46:55 +1000 Subject: [PATCH 01/20] Update API paths --- .../stateless/stacks/workflow-manager/README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/workload/stateless/stacks/workflow-manager/README.md b/lib/workload/stateless/stacks/workflow-manager/README.md index 86f31ac98..922edefd6 100644 --- a/lib/workload/stateless/stacks/workflow-manager/README.md +++ b/lib/workload/stateless/stacks/workflow-manager/README.md @@ -40,6 +40,7 @@ make ps ``` python manage.py help python manage.py showmigrations +python manage.py makemigrations python manage.py migrate ``` @@ -73,17 +74,17 @@ python manage.py runserver_plus ``` ``` -curl -s http://localhost:8000/wfm/v1/workflow | jq +curl -s http://localhost:8000/api/v1/workflow | jq ``` ``` -curl -s http://localhost:8000/wfm/v1/workflow/1 | jq +curl -s http://localhost:8000/api/v1/workflow/1 | jq ``` Or visit in browser: -- http://localhost:8000/wfm/v1 -- http://localhost:8000/wfm/v1/workflow -- http://localhost:8000/wfm/v1/workflow/1 +- http://localhost:8000/api/v1 +- http://localhost:8000/api/v1/workflow +- http://localhost:8000/api/v1/workflow/1 ### API Doc From 71b7ea4df5d120eebd96710d4fb7f438616528f5 Mon Sep 17 00:00:00 2001 From: Florian Reisinger Date: Tue, 13 Aug 2024 16:47:41 +1000 Subject: [PATCH 02/20] Add library model --- .../workflow_manager/models/library.py | 26 +++++++++++++++++++ .../workflow_manager/models/workflow_run.py | 12 +++++++++ 2 files changed, 38 insertions(+) create mode 100644 lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/library.py diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/library.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/library.py new file mode 100644 index 000000000..940f03e69 --- /dev/null +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/library.py @@ -0,0 +1,26 @@ +from django.core.serializers.json import DjangoJSONEncoder +from django.db import models + +from workflow_manager.models.base import OrcaBusBaseModel, OrcaBusBaseManager +from workflow_manager.models.workflow import Workflow + + +class LibraryManager(OrcaBusBaseManager): + pass + + +class Library(OrcaBusBaseModel): + id = models.BigAutoField(primary_key=True) + + library_id = models.CharField(max_length=255, unique=True) + + objects = LibraryManager() + + def __str__(self): + return f"ID: {self.id}, library_id: {self.library_id}" + + def to_dict(self): + return { + "id": self.id, + "library_id": self.library_id + } diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/workflow_run.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/workflow_run.py index 32ba36480..768dec46f 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/workflow_run.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/workflow_run.py @@ -3,6 +3,7 @@ from workflow_manager.models.base import OrcaBusBaseModel, OrcaBusBaseManager from workflow_manager.models.payload import Payload from workflow_manager.models.workflow import Workflow +from workflow_manager.models.library import Library class WorkflowRunManager(OrcaBusBaseManager): @@ -36,6 +37,10 @@ class Meta: # Link to workflow payload data payload = models.ForeignKey(Payload, null=True, blank=True, on_delete=models.SET_NULL) + # Link to library table + libraries = models.ManyToManyField(Library, through="LibraryAssociation") + + objects = WorkflowRunManager() def __str__(self): @@ -53,3 +58,10 @@ def to_dict(self): "payload": self.payload.to_dict() if (self.payload is not None) else None, "workflow": self.workflow.to_dict() if (self.workflow is not None) else None } + + +class LibraryAssociation(OrcaBusBaseModel): + workflow_run = models.ForeignKey(WorkflowRun, on_delete=models.CASCADE) + library = models.ForeignKey(Library, on_delete=models.CASCADE) + association_date = models.DateTimeField() + status = models.CharField(max_length=255) From 27096f04b675e0acd1b6569b92ad05959a90a96f Mon Sep 17 00:00:00 2001 From: Florian Reisinger Date: Wed, 14 Aug 2024 11:57:45 +1000 Subject: [PATCH 03/20] Add library to model and schema --- .../workflow_manager/serializers.py | 5 +++++ .../workflow_manager/urls/base.py | 1 + .../workflow_manager/viewsets/library.py | 18 ++++++++++++++++++ .../WorkflowRunStateChange.py | 18 +++++++++++++++++- .../WorkflowRunStateChange.py | 18 +++++++++++++++++- 5 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/library.py diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/serializers.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/serializers.py index e606019ba..003d410a4 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/serializers.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/serializers.py @@ -54,3 +54,8 @@ class PayloadModelSerializer(serializers.ModelSerializer): class Meta: model = Payload fields = '__all__' + +class LibraryModelSerializer(serializers.ModelSerializer): + class Meta: + model = Library + fields = '__all__' diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py index 22902fbd8..beb14bdc2 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py @@ -14,6 +14,7 @@ router.register(r"workflow", WorkflowViewSet, basename="workflow") router.register(r"workflowrun", WorkflowRunViewSet, basename="workflowrun") router.register(r"payload", PayloadViewSet, basename="payload") +router.register(r"library", PayloadViewSet, basename="library") urlpatterns = [ path(f"{api_base}", include(router.urls)), diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/library.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/library.py new file mode 100644 index 000000000..d529aa5a9 --- /dev/null +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/library.py @@ -0,0 +1,18 @@ +from rest_framework import filters +from rest_framework.viewsets import ReadOnlyModelViewSet + +from workflow_manager.models.library import Library +from workflow_manager.pagination import StandardResultsSetPagination +from workflow_manager.serializers import LibraryModelSerializer + + +class LibraryViewSet(ReadOnlyModelViewSet): + serializer_class = LibraryModelSerializer + pagination_class = StandardResultsSetPagination + filter_backends = [filters.OrderingFilter, filters.SearchFilter] + ordering_fields = '__all__' + ordering = ['-id'] + search_fields = Library.get_base_fields() + + def get_queryset(self): + return Library.objects.get_by_keyword(**self.request.query_params) diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/executionservice/workflowrunstatechange/WorkflowRunStateChange.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/executionservice/workflowrunstatechange/WorkflowRunStateChange.py index a80b1943f..a334306ca 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/executionservice/workflowrunstatechange/WorkflowRunStateChange.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/executionservice/workflowrunstatechange/WorkflowRunStateChange.py @@ -14,6 +14,7 @@ class WorkflowRunStateChange(object): 'workflowName': 'str', 'workflowVersion': 'str', 'workflowRunName': 'str', + 'linkedLibraries': 'list[str]', 'payload': 'Payload' } @@ -25,10 +26,11 @@ class WorkflowRunStateChange(object): 'workflowName': 'workflowName', 'workflowVersion': 'workflowVersion', 'workflowRunName': 'workflowRunName', + 'linkedLibraries': 'linkedLibraries', 'payload': 'payload' } - def __init__(self, portalRunId=None, executionId=None, timestamp=None, status=None, workflowName=None, workflowVersion=None, workflowRunName=None, payload=None): # noqa: E501 + def __init__(self, portalRunId=None, executionId=None, timestamp=None, status=None, workflowName=None, workflowVersion=None, workflowRunName=None, linkedLibraries=None, payload=None): # noqa: E501 self._portalRunId = None self._executionId = None self._timestamp = None @@ -36,6 +38,7 @@ def __init__(self, portalRunId=None, executionId=None, timestamp=None, status=No self._workflowName = None self._workflowVersion = None self._workflowRunName = None + self._linkedLibraries = None self._payload = None self.discriminator = None self.portalRunId = portalRunId @@ -45,6 +48,7 @@ def __init__(self, portalRunId=None, executionId=None, timestamp=None, status=No self.workflowName = workflowName self.workflowVersion = workflowVersion self.workflowRunName = workflowRunName + self.linkedLibraries = linkedLibraries self.payload = payload @@ -132,6 +136,18 @@ def workflowRunName(self, workflowRunName): self._workflowRunName = workflowRunName + @property + def linkedLibraries(self): + + return self._linkedLibraries + + @linkedLibraries.setter + def linkedLibraries(self, linkedLibraries): + + + self._linkedLibraries = linkedLibraries + + @property def payload(self): diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/workflowmanager/workflowrunstatechange/WorkflowRunStateChange.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/workflowmanager/workflowrunstatechange/WorkflowRunStateChange.py index 8037d7ec6..276da2935 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/workflowmanager/workflowrunstatechange/WorkflowRunStateChange.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/workflowmanager/workflowrunstatechange/WorkflowRunStateChange.py @@ -13,6 +13,7 @@ class WorkflowRunStateChange(object): 'workflowName': 'str', 'workflowVersion': 'str', 'workflowRunName': 'str', + 'linkedLibraries': 'list[str]', 'payload': 'Payload' } @@ -23,16 +24,18 @@ class WorkflowRunStateChange(object): 'workflowName': 'workflowName', 'workflowVersion': 'workflowVersion', 'workflowRunName': 'workflowRunName', + 'linkedLibraries': 'linkedLibraries', 'payload': 'payload' } - def __init__(self, portalRunId=None, timestamp=None, status=None, workflowName=None, workflowVersion=None, workflowRunName=None, payload=None): # noqa: E501 + def __init__(self, portalRunId=None, timestamp=None, status=None, workflowName=None, workflowVersion=None, workflowRunName=None, linkedLibraries=None, payload=None): # noqa: E501 self._portalRunId = None self._timestamp = None self._status = None self._workflowName = None self._workflowVersion = None self._workflowRunName = None + self._linkedLibraries = None self._payload = None self.discriminator = None self.portalRunId = portalRunId @@ -41,6 +44,7 @@ def __init__(self, portalRunId=None, timestamp=None, status=None, workflowName=N self.workflowName = workflowName self.workflowVersion = workflowVersion self.workflowRunName = workflowRunName + self.linkedLibraries = linkedLibraries self.payload = payload @@ -115,6 +119,18 @@ def workflowRunName(self, workflowRunName): self._workflowRunName = workflowRunName + + @property + def linkedLibraries(self): + + return self._linkedLibraries + + @linkedLibraries.setter + def linkedLibraries(self, linkedLibraries): + + + self._linkedLibraries = linkedLibraries + @property def payload(self): From f53a3f40222115f7520e13cb80cc89cff2a15a52 Mon Sep 17 00:00:00 2001 From: Florian Reisinger Date: Wed, 14 Aug 2024 12:02:49 +1000 Subject: [PATCH 04/20] Event schema update --- .../events/workflowmanager/WorkflowRunStateChange.schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/schemas/events/workflowmanager/WorkflowRunStateChange.schema.json b/docs/schemas/events/workflowmanager/WorkflowRunStateChange.schema.json index e30646448..e3932acd2 100644 --- a/docs/schemas/events/workflowmanager/WorkflowRunStateChange.schema.json +++ b/docs/schemas/events/workflowmanager/WorkflowRunStateChange.schema.json @@ -119,4 +119,4 @@ } } } -} +} \ No newline at end of file From 5419f3b4b99c4ec8de596cccfb4fc4fdd83432d0 Mon Sep 17 00:00:00 2001 From: Florian Reisinger Date: Thu, 15 Aug 2024 09:25:35 +1000 Subject: [PATCH 05/20] First workflowrun with lib ids test working --- .../stacks/workflow-manager/README.md | 2 +- .../commands/generate_mock_workflow_run.py | 23 ++++-- ...ibraryassociation_workflowrun_libraries.py | 42 +++++++++++ .../workflow_manager/models/__init__.py | 3 +- .../workflow_manager/models/workflow_run.py | 7 +- .../workflow_manager/serializers.py | 4 +- .../workflow_manager/tests/factories.py | 34 +++++---- .../services/create_workflow_run.py | 73 +++++++++++++------ .../tests/test_workflow_srv.py | 73 +++++++++++++++++-- 9 files changed, 206 insertions(+), 55 deletions(-) create mode 100644 lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0002_library_libraryassociation_workflowrun_libraries.py diff --git a/lib/workload/stateless/stacks/workflow-manager/README.md b/lib/workload/stateless/stacks/workflow-manager/README.md index 922edefd6..b64df4c92 100644 --- a/lib/workload/stateless/stacks/workflow-manager/README.md +++ b/lib/workload/stateless/stacks/workflow-manager/README.md @@ -51,7 +51,7 @@ _^^^ please make sure to run `python manage.py migrate` first! ^^^_ #### Generate Workflow Record ``` -python manage.py help generate_mock_data +python manage.py help generate_mock_workflow_run > Generate mock Workflow data into database for local development and testing ``` diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/management/commands/generate_mock_workflow_run.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/management/commands/generate_mock_workflow_run.py index 5d27a5cb4..14c2beb76 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/management/commands/generate_mock_workflow_run.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/management/commands/generate_mock_workflow_run.py @@ -1,10 +1,13 @@ from django.core.management import BaseCommand from django.db.models import QuerySet +from django.utils.timezone import make_aware import json +from datetime import datetime from libumccr import libjson -from workflow_manager.models import WorkflowRun -from workflow_manager.tests.factories import WorkflowRunFactory, WorkflowFactory, PayloadFactory +from workflow_manager.models import WorkflowRun, LibraryAssociation +from workflow_manager.tests.factories import WorkflowRunFactory, WorkflowFactory, PayloadFactory, LibraryFactory + # https://docs.djangoproject.com/en/5.0/howto/custom-management-commands/ class Command(BaseCommand): @@ -14,10 +17,18 @@ def handle(self, *args, **options): wf_payload = PayloadFactory() wf_workflow = WorkflowFactory() - wf:WorkflowRun = WorkflowRunFactory( - workflow_run_name = "MockWorkflowRun", - payload = wf_payload, - workflow = wf_workflow + wf: WorkflowRun = WorkflowRunFactory( + workflow_run_name="MockWorkflowRun", + payload=wf_payload, + workflow=wf_workflow + ) + + library = LibraryFactory() + LibraryAssociation.objects.create( + workflow_run=wf, + library=library, + association_date=make_aware(datetime.now()), + status="ACTIVE", ) print(libjson.dumps(wf.to_dict())) diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0002_library_libraryassociation_workflowrun_libraries.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0002_library_libraryassociation_workflowrun_libraries.py new file mode 100644 index 000000000..80a342d0e --- /dev/null +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0002_library_libraryassociation_workflowrun_libraries.py @@ -0,0 +1,42 @@ +# Generated by Django 5.1 on 2024-08-14 04:07 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('workflow_manager', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='Library', + fields=[ + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ('library_id', models.CharField(max_length=255, unique=True)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='LibraryAssociation', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('association_date', models.DateTimeField()), + ('status', models.CharField(max_length=255)), + ('library', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='workflow_manager.library')), + ('workflow_run', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='workflow_manager.workflowrun')), + ], + options={ + 'abstract': False, + }, + ), + migrations.AddField( + model_name='workflowrun', + name='libraries', + field=models.ManyToManyField(through='workflow_manager.LibraryAssociation', to='workflow_manager.library'), + ), + ] diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/__init__.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/__init__.py index ce81e5ceb..a1e68b280 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/__init__.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/__init__.py @@ -2,4 +2,5 @@ from .workflow import Workflow from .payload import Payload -from .workflow_run import WorkflowRun +from .workflow_run import WorkflowRun, LibraryAssociation +from .library import Library diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/workflow_run.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/workflow_run.py index 768dec46f..baba1278f 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/workflow_run.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/workflow_run.py @@ -40,7 +40,6 @@ class Meta: # Link to library table libraries = models.ManyToManyField(Library, through="LibraryAssociation") - objects = WorkflowRunManager() def __str__(self): @@ -60,8 +59,14 @@ def to_dict(self): } +class LibraryAssociationManager(OrcaBusBaseManager): + pass + + class LibraryAssociation(OrcaBusBaseModel): workflow_run = models.ForeignKey(WorkflowRun, on_delete=models.CASCADE) library = models.ForeignKey(Library, on_delete=models.CASCADE) association_date = models.DateTimeField() status = models.CharField(max_length=255) + + objects = LibraryAssociationManager() diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/serializers.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/serializers.py index 003d410a4..aaa783fde 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/serializers.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/serializers.py @@ -3,9 +3,7 @@ from rest_framework import serializers from rest_framework.fields import empty -from workflow_manager.models.workflow import Workflow -from workflow_manager.models.workflow_run import WorkflowRun -from workflow_manager.models.payload import Payload +from workflow_manager.models import Workflow, WorkflowRun, Payload, Library READ_ONLY_SERIALIZER = "READ ONLY SERIALIZER" diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/tests/factories.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/tests/factories.py index b5d0a94d6..916b0ccbe 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/tests/factories.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/tests/factories.py @@ -6,12 +6,10 @@ import factory from django.utils.timezone import make_aware -from workflow_manager.models.workflow import Workflow -from workflow_manager.models.workflow_run import WorkflowRun -from workflow_manager.models.payload import Payload +from workflow_manager.models import Workflow, WorkflowRun, Payload, Library -class TestConstant(Enum): +class TestConstant(Enum): workflow_name = "TestWorkflow1" payload = { "key": "value", @@ -19,25 +17,26 @@ class TestConstant(Enum): "bar": datetime.now().astimezone(ZoneInfo('Australia/Sydney')), "sub": {"my": "sub"} } + library_id = "L000001" class WorkflowFactory(factory.django.DjangoModelFactory): class Meta: model = Workflow - workflow_name = "TestWorkflow" - workflow_version = "1.0" - execution_engine_pipeline_id = str(uuid.uuid4()) - execution_engine = "ICAv2" - approval_state = "NATA" + workflow_name = "TestWorkflow" + workflow_version = "1.0" + execution_engine_pipeline_id = str(uuid.uuid4()) + execution_engine = "ICAv2" + approval_state = "NATA" class PayloadFactory(factory.django.DjangoModelFactory): class Meta: model = Payload - version = "1.0.0" - payload_ref_id = str(uuid.uuid4()) + version = "1.0.0" + payload_ref_id = str(uuid.uuid4()) data = TestConstant.payload.value @@ -46,10 +45,10 @@ class Meta: model = WorkflowRun _uid = str(uuid.uuid4()) - portal_run_id = f"20240130{_uid[:8]}" - execution_id = _uid - workflow_run_name = f"TestWorkflowRun{_uid[:8]}" - status = "READY" + portal_run_id = f"20240130{_uid[:8]}" + execution_id = _uid + workflow_run_name = f"TestWorkflowRun{_uid[:8]}" + status = "READY" comment = "Lorem Ipsum" timestamp = make_aware(datetime.now()) # If required, set later @@ -57,3 +56,8 @@ class Meta: workflow = None +class LibraryFactory(factory.django.DjangoModelFactory): + class Meta: + model = Library + + library_id = TestConstant.library_id.value diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py index 9eb441154..76abca99c 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py @@ -4,9 +4,23 @@ # # --- keep ^^^ at top of the module import uuid +from datetime import datetime + from django.db import transaction -from workflow_manager_proc.domain.executionservice.workflowrunstatechange import WorkflowRunStateChange, Marshaller -from workflow_manager.models.workflow_run import WorkflowRun, Workflow, Payload +from django.utils.timezone import make_aware +from workflow_manager_proc.domain.executionservice.workflowrunstatechange import ( + WorkflowRunStateChange, + Marshaller, +) +from workflow_manager.models.workflow_run import ( + WorkflowRun, + Workflow, + Payload, + Library, + LibraryAssociation, +) + +ASSOCIATION_STATUS = "ACTIVE" @transaction.atomic @@ -17,45 +31,45 @@ def handler(event, context): print(f"Processing {event}, {context}") wrsc: WorkflowRunStateChange = Marshaller.unmarshall(event, WorkflowRunStateChange) + print(wrsc) - # We expect: a corresponding Workflow has to exist for each workflow run + # We expect: a corresponding Workflow has to exist for each workflow run # TODO: decide whether we allow dynamic workflow creation or expect them to exist and fail if not try: print(f"Looking for workflow ({wrsc.workflowName}:{wrsc.workflowVersion}).") workflow: Workflow = Workflow.objects.get( - workflow_name = wrsc.workflowName, - workflow_version = wrsc.workflowVersion + workflow_name=wrsc.workflowName, workflow_version=wrsc.workflowVersion ) except Exception: print("No workflow found! Creating new entry.") workflow = Workflow( - workflow_name = wrsc.workflowName, - workflow_version = wrsc.workflowVersion, - execution_engine = "Unknown", - execution_engine_pipeline_id = "Unknown", - approval_state = "RESEARCH" + workflow_name=wrsc.workflowName, + workflow_version=wrsc.workflowVersion, + execution_engine="Unknown", + execution_engine_pipeline_id="Unknown", + approval_state="RESEARCH", ) print("Persisting Workflow record.") workflow.save() # then create the actual workflow run state change entry wfr = WorkflowRun( - workflow = workflow, - portal_run_id = wrsc.portalRunId, - execution_id = wrsc.executionId, # the execution service WRSC does carry the execution ID - workflow_run_name = wrsc.workflowRunName, - status = wrsc.status, - comment = None, - timestamp = wrsc.timestamp - ) + workflow=workflow, + portal_run_id=wrsc.portalRunId, + execution_id=wrsc.executionId, # the execution service WRSC does carry the execution ID + workflow_run_name=wrsc.workflowRunName, + status=wrsc.status, + comment=None, + timestamp=wrsc.timestamp, + ) # if payload is not null, create a new payload entry and assign a unique reference ID for it input_payload: Payload = wrsc.payload if input_payload: pld = Payload( - payload_ref_id = str(uuid.uuid4()), - version = input_payload.version, - data = input_payload.data + payload_ref_id=str(uuid.uuid4()), + version=input_payload.version, + data=input_payload.data, ) print("Persisting Payload record.") pld.save() @@ -65,5 +79,22 @@ def handler(event, context): print("Persisting WorkflowRun record.") wfr.save() + # if the workflow run is linked to library record(s), create the association(s) + input_libraries: list[str] = wrsc.linkedLibraries + for input_lib in input_libraries: + # check if the library has already a DB record + db_lib: Library = Library.objects.get_by_keyword(library_id=input_lib) + # create it if not + if not db_lib: + db_lib = Library.objects.create(library_id=input_lib) + + # create the library association + LibraryAssociation.objects.create( + workflow_run=wfr, + library=db_lib, + association_date=make_aware(datetime.now()), + status=ASSOCIATION_STATUS, + ) + print(f"{__name__} done.") return wfr # FIXME: serialise in future (json.dumps) diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/tests/test_workflow_srv.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/tests/test_workflow_srv.py index c17b47026..a7c8a5407 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/tests/test_workflow_srv.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/tests/test_workflow_srv.py @@ -6,17 +6,76 @@ class WorkflowSrvUnitTests(WorkflowManagerProcUnitTestCase): - @skip + # @skip def test_get_workflow_from_db(self): """ python manage.py test workflow_manager_proc.tests.test_workflow_srv.WorkflowSrvUnitTests.test_get_workflow_from_db """ - # TODO: implement - mock_wfl = Workflow() - mock_wfl.text = "Test Workflow" - mock_wfl.save() - test_wfl = create_workflow_run.handler() + test_event = { + "portalRunId": "202405012397gatc", + "executionId": "icav2.id.12345", + "timestamp": "2025-05-01T09:25:44Z", + "status": "SUCCEEDED", + "workflowName": "ctTSO500", + "workflowVersion": "4.2.7", + "workflowRunName": "ctTSO500-L000002", + "payload": { + "version": "0.1.0", + "data": { + "projectId": "bxxxxxxxx-dxxx-4xxxx-adcc-xxxxxxxxx", + "analysisId": "12345678-238c-4200-b632-d5dd8c8db94a", + "userReference": "540424_A01001_0193_BBBBMMDRX5_c754de_bd822f", + "timeCreated": "2024-05-01T10:11:35Z", + "timeModified": "2024-05-01T11:24:29Z", + "pipelineId": "bfffffff-cb27-4dfa-846e-acd6eb081aca", + "pipelineCode": "CTTSO500 v4_2_7", + "pipelineDescription": "This is an ctTSO500 workflow execution", + "pipelineUrn": "urn:ilmn:ica:pipeline:bfffffff-cb27-4dfa-846e-acd6eb081aca#CTTSO500_v4_2_7" + } + } + } + + test_wfl = create_workflow_run.handler(test_event, None) + logger.info(test_wfl) + self.assertIsNotNone(test_wfl) + self.assertEqual("ctTSO500-L000002", test_wfl.workflow_run_name) + + def test_get_workflow_from_db2(self): + """ + python manage.py test workflow_manager_proc.tests.test_workflow_srv.WorkflowSrvUnitTests.test_get_workflow_from_db2 + """ + lib_ids = ["L000001", "L000002"] + + test_event = { + "portalRunId": "202405012397gatc", + "executionId": "icav2.id.12345", + "timestamp": "2025-05-01T09:25:44Z", + "status": "SUCCEEDED", + "workflowName": "ctTSO500", + "workflowVersion": "4.2.7", + "workflowRunName": "ctTSO500-L000002", + "linkedLibraries": lib_ids, + "payload": { + "version": "0.1.0", + "data": { + "projectId": "bxxxxxxxx-dxxx-4xxxx-adcc-xxxxxxxxx", + "analysisId": "12345678-238c-4200-b632-d5dd8c8db94a", + "userReference": "540424_A01001_0193_BBBBMMDRX5_c754de_bd822f", + "timeCreated": "2024-05-01T10:11:35Z", + "timeModified": "2024-05-01T11:24:29Z", + "pipelineId": "bfffffff-cb27-4dfa-846e-acd6eb081aca", + "pipelineCode": "CTTSO500 v4_2_7", + "pipelineDescription": "This is an ctTSO500 workflow execution", + "pipelineUrn": "urn:ilmn:ica:pipeline:bfffffff-cb27-4dfa-846e-acd6eb081aca#CTTSO500_v4_2_7" + } + } + } + + test_wfl = create_workflow_run.handler(test_event, None) logger.info(test_wfl) self.assertIsNotNone(test_wfl) - self.assertIn("Workflow", test_wfl.portal_run_id) + self.assertEqual("ctTSO500-L000002", test_wfl.workflow_run_name) + libs = test_wfl.libraries.all() + for lib in libs: + self.assertIn(lib.library_id, lib_ids) From 7161c49202fca08a14a041edbd6c499928e3ae18 Mon Sep 17 00:00:00 2001 From: Florian Reisinger Date: Thu, 15 Aug 2024 09:57:37 +1000 Subject: [PATCH 06/20] Add .gitattributes --- .gitattributes | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..937c0eb37 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +/.yarn/releases/** binary +/.yarn/plugins/** binary From 3373929efbc23c33975b661fb7b71e94bdede6c0 Mon Sep 17 00:00:00 2001 From: Florian Reisinger Date: Mon, 19 Aug 2024 11:34:50 +1000 Subject: [PATCH 07/20] WRSC schema update --- ...ibraryassociation_workflowrun_libraries.py | 42 --------- .../workflowrunstatechange/LibraryRecord.py | 92 +++++++++++++++++++ .../WorkflowRunStateChange.py | 2 +- .../workflowrunstatechange/__init__.py | 1 + .../workflowrunstatechange/LibraryRecord.py | 92 +++++++++++++++++++ .../WorkflowRunStateChange.py | 4 +- .../workflowrunstatechange/__init__.py | 1 + 7 files changed, 189 insertions(+), 45 deletions(-) delete mode 100644 lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0002_library_libraryassociation_workflowrun_libraries.py create mode 100644 lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/executionservice/workflowrunstatechange/LibraryRecord.py create mode 100644 lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/workflowmanager/workflowrunstatechange/LibraryRecord.py diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0002_library_libraryassociation_workflowrun_libraries.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0002_library_libraryassociation_workflowrun_libraries.py deleted file mode 100644 index 80a342d0e..000000000 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0002_library_libraryassociation_workflowrun_libraries.py +++ /dev/null @@ -1,42 +0,0 @@ -# Generated by Django 5.1 on 2024-08-14 04:07 - -import django.db.models.deletion -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('workflow_manager', '0001_initial'), - ] - - operations = [ - migrations.CreateModel( - name='Library', - fields=[ - ('id', models.BigAutoField(primary_key=True, serialize=False)), - ('library_id', models.CharField(max_length=255, unique=True)), - ], - options={ - 'abstract': False, - }, - ), - migrations.CreateModel( - name='LibraryAssociation', - fields=[ - ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('association_date', models.DateTimeField()), - ('status', models.CharField(max_length=255)), - ('library', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='workflow_manager.library')), - ('workflow_run', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='workflow_manager.workflowrun')), - ], - options={ - 'abstract': False, - }, - ), - migrations.AddField( - model_name='workflowrun', - name='libraries', - field=models.ManyToManyField(through='workflow_manager.LibraryAssociation', to='workflow_manager.library'), - ), - ] diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/executionservice/workflowrunstatechange/LibraryRecord.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/executionservice/workflowrunstatechange/LibraryRecord.py new file mode 100644 index 000000000..db8e1dee7 --- /dev/null +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/executionservice/workflowrunstatechange/LibraryRecord.py @@ -0,0 +1,92 @@ +# coding: utf-8 +import pprint +import re # noqa: F401 + +import six +from enum import Enum + +class LibraryRecord(object): + + + _types = { + 'libraryId': 'str', + 'orcabusId': 'str' + } + + _attribute_map = { + 'libraryId': 'libraryId', + 'orcabusId': 'orcabusId' + } + + def __init__(self, libraryId=None, orcabusId=None): # noqa: E501 + self._libraryId = None + self._orcabusId = None + self.discriminator = None + self.libraryId = libraryId + self.orcabusId = orcabusId + + + @property + def libraryId(self): + + return self._libraryId + + @libraryId.setter + def libraryId(self, libraryId): + + + self._libraryId = libraryId + + + @property + def orcabusId(self): + + return self._orcabusId + + @orcabusId.setter + def orcabusId(self, orcabusId): + + + self._orcabusId = orcabusId + + def to_dict(self): + result = {} + + for attr, _ in six.iteritems(self._types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + if issubclass(LibraryRecord, dict): + for key, value in self.items(): + result[key] = value + + return result + + def to_str(self): + return pprint.pformat(self.to_dict()) + + def __repr__(self): + return self.to_str() + + def __eq__(self, other): + if not isinstance(other, LibraryRecord): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not self == other + diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/executionservice/workflowrunstatechange/WorkflowRunStateChange.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/executionservice/workflowrunstatechange/WorkflowRunStateChange.py index a334306ca..8a3807708 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/executionservice/workflowrunstatechange/WorkflowRunStateChange.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/executionservice/workflowrunstatechange/WorkflowRunStateChange.py @@ -14,7 +14,7 @@ class WorkflowRunStateChange(object): 'workflowName': 'str', 'workflowVersion': 'str', 'workflowRunName': 'str', - 'linkedLibraries': 'list[str]', + 'linkedLibraries': 'list[LibraryRecord]', 'payload': 'Payload' } diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/executionservice/workflowrunstatechange/__init__.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/executionservice/workflowrunstatechange/__init__.py index 394eb38ee..bef47cdab 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/executionservice/workflowrunstatechange/__init__.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/executionservice/workflowrunstatechange/__init__.py @@ -4,5 +4,6 @@ from .marshaller import Marshaller from .AWSEvent import AWSEvent +from .LibraryRecord import LibraryRecord from .Payload import Payload from .WorkflowRunStateChange import WorkflowRunStateChange diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/workflowmanager/workflowrunstatechange/LibraryRecord.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/workflowmanager/workflowrunstatechange/LibraryRecord.py new file mode 100644 index 000000000..db8e1dee7 --- /dev/null +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/workflowmanager/workflowrunstatechange/LibraryRecord.py @@ -0,0 +1,92 @@ +# coding: utf-8 +import pprint +import re # noqa: F401 + +import six +from enum import Enum + +class LibraryRecord(object): + + + _types = { + 'libraryId': 'str', + 'orcabusId': 'str' + } + + _attribute_map = { + 'libraryId': 'libraryId', + 'orcabusId': 'orcabusId' + } + + def __init__(self, libraryId=None, orcabusId=None): # noqa: E501 + self._libraryId = None + self._orcabusId = None + self.discriminator = None + self.libraryId = libraryId + self.orcabusId = orcabusId + + + @property + def libraryId(self): + + return self._libraryId + + @libraryId.setter + def libraryId(self, libraryId): + + + self._libraryId = libraryId + + + @property + def orcabusId(self): + + return self._orcabusId + + @orcabusId.setter + def orcabusId(self, orcabusId): + + + self._orcabusId = orcabusId + + def to_dict(self): + result = {} + + for attr, _ in six.iteritems(self._types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + if issubclass(LibraryRecord, dict): + for key, value in self.items(): + result[key] = value + + return result + + def to_str(self): + return pprint.pformat(self.to_dict()) + + def __repr__(self): + return self.to_str() + + def __eq__(self, other): + if not isinstance(other, LibraryRecord): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not self == other + diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/workflowmanager/workflowrunstatechange/WorkflowRunStateChange.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/workflowmanager/workflowrunstatechange/WorkflowRunStateChange.py index 276da2935..b0a2b545d 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/workflowmanager/workflowrunstatechange/WorkflowRunStateChange.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/workflowmanager/workflowrunstatechange/WorkflowRunStateChange.py @@ -13,7 +13,7 @@ class WorkflowRunStateChange(object): 'workflowName': 'str', 'workflowVersion': 'str', 'workflowRunName': 'str', - 'linkedLibraries': 'list[str]', + 'linkedLibraries': 'list[LibraryRecord]', 'payload': 'Payload' } @@ -119,7 +119,7 @@ def workflowRunName(self, workflowRunName): self._workflowRunName = workflowRunName - + @property def linkedLibraries(self): diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/workflowmanager/workflowrunstatechange/__init__.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/workflowmanager/workflowrunstatechange/__init__.py index 394eb38ee..bef47cdab 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/workflowmanager/workflowrunstatechange/__init__.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/domain/workflowmanager/workflowrunstatechange/__init__.py @@ -4,5 +4,6 @@ from .marshaller import Marshaller from .AWSEvent import AWSEvent +from .LibraryRecord import LibraryRecord from .Payload import Payload from .WorkflowRunStateChange import WorkflowRunStateChange From 5a947117041f388e6b11edd9ebf85d699a39f8c4 Mon Sep 17 00:00:00 2001 From: Florian Reisinger Date: Tue, 20 Aug 2024 10:49:26 +1000 Subject: [PATCH 08/20] Update linked library model to contain lab and metadata IDs * added library endpoint * adopt metadata library id for internal model * fix generator / test --- .../commands/generate_mock_workflow_run.py | 32 +++++++++++--- ...ibraryassociation_workflowrun_libraries.py | 42 +++++++++++++++++++ .../workflow_manager/models/library.py | 8 ++-- .../workflow_manager/tests/factories.py | 8 +++- .../workflow_manager/urls/base.py | 3 +- .../workflow_manager/viewsets/library.py | 2 +- .../workflow_manager/viewsets/workflow_run.py | 1 + .../services/create_workflow_run.py | 32 +++++++------- .../services/get_workflow_run.py | 4 +- .../tests/test_workflow_srv.py | 15 ++++++- 10 files changed, 116 insertions(+), 31 deletions(-) create mode 100644 lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0002_library_libraryassociation_workflowrun_libraries.py diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/management/commands/generate_mock_workflow_run.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/management/commands/generate_mock_workflow_run.py index 14c2beb76..ee8dcf680 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/management/commands/generate_mock_workflow_run.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/management/commands/generate_mock_workflow_run.py @@ -5,9 +5,11 @@ import json from datetime import datetime from libumccr import libjson -from workflow_manager.models import WorkflowRun, LibraryAssociation +from workflow_manager.models import Workflow, WorkflowRun, LibraryAssociation from workflow_manager.tests.factories import WorkflowRunFactory, WorkflowFactory, PayloadFactory, LibraryFactory +WORKFLOW_NAME = "TestWorkflow" + # https://docs.djangoproject.com/en/5.0/howto/custom-management-commands/ class Command(BaseCommand): @@ -15,21 +17,41 @@ class Command(BaseCommand): def handle(self, *args, **options): wf_payload = PayloadFactory() - wf_workflow = WorkflowFactory() + qs: QuerySet = Workflow.objects.filter(workflow_name=WORKFLOW_NAME) + + if qs.exists(): + print("Mock data found, Skipping creation.") + return + wf = WorkflowFactory(workflow_name=WORKFLOW_NAME) - wf: WorkflowRun = WorkflowRunFactory( + wfr: WorkflowRun = WorkflowRunFactory( workflow_run_name="MockWorkflowRun", + portal_run_id="1234", payload=wf_payload, - workflow=wf_workflow + workflow=wf ) library = LibraryFactory() LibraryAssociation.objects.create( - workflow_run=wf, + workflow_run=wfr, library=library, association_date=make_aware(datetime.now()), status="ACTIVE", ) + wfr2: WorkflowRun = WorkflowRunFactory( + workflow_run_name="MockWorkflowRun2", + portal_run_id="1235", + payload=wf_payload, + workflow=wf + ) + library2 = LibraryFactory(orcabus_id="lib.01J5M2JFE1JPYV62RYQEG99CP5", library_id="L000002") + LibraryAssociation.objects.create( + workflow_run=wfr2, + library=library2, + association_date=make_aware(datetime.now()), + status="ACTIVE", + ) + print(libjson.dumps(wf.to_dict())) print("Done") diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0002_library_libraryassociation_workflowrun_libraries.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0002_library_libraryassociation_workflowrun_libraries.py new file mode 100644 index 000000000..1d7e891f3 --- /dev/null +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0002_library_libraryassociation_workflowrun_libraries.py @@ -0,0 +1,42 @@ +# Generated by Django 5.1 on 2024-08-19 01:52 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('workflow_manager', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='Library', + fields=[ + ('orcabus_id', models.CharField(max_length=255, primary_key=True, serialize=False)), + ('library_id', models.CharField(max_length=255)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='LibraryAssociation', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('association_date', models.DateTimeField()), + ('status', models.CharField(max_length=255)), + ('library', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='workflow_manager.library')), + ('workflow_run', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='workflow_manager.workflowrun')), + ], + options={ + 'abstract': False, + }, + ), + migrations.AddField( + model_name='workflowrun', + name='libraries', + field=models.ManyToManyField(through='workflow_manager.LibraryAssociation', to='workflow_manager.library'), + ), + ] diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/library.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/library.py index 940f03e69..fc57ac4dc 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/library.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/library.py @@ -10,17 +10,17 @@ class LibraryManager(OrcaBusBaseManager): class Library(OrcaBusBaseModel): - id = models.BigAutoField(primary_key=True) - library_id = models.CharField(max_length=255, unique=True) + orcabus_id = models.CharField(primary_key=True, max_length=255) + library_id = models.CharField(max_length=255) objects = LibraryManager() def __str__(self): - return f"ID: {self.id}, library_id: {self.library_id}" + return f"orcabus_id: {self.orcabus_id}, library_id: {self.library_id}" def to_dict(self): return { - "id": self.id, + "orcabus_id": self.orcabus_id, "library_id": self.library_id } diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/tests/factories.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/tests/factories.py index 916b0ccbe..a530cddaf 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/tests/factories.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/tests/factories.py @@ -16,8 +16,11 @@ class TestConstant(Enum): "foo": uuid.uuid4(), "bar": datetime.now().astimezone(ZoneInfo('Australia/Sydney')), "sub": {"my": "sub"} + }, + library = { + "library_id": "L000001", + "orcabus_id": "lib.01J5M2J44HFJ9424G7074NKTGN" } - library_id = "L000001" class WorkflowFactory(factory.django.DjangoModelFactory): @@ -60,4 +63,5 @@ class LibraryFactory(factory.django.DjangoModelFactory): class Meta: model = Library - library_id = TestConstant.library_id.value + library_id = TestConstant.library.value["library_id"] + orcabus_id = TestConstant.library.value["orcabus_id"] diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py index beb14bdc2..e642cc8e2 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py @@ -4,6 +4,7 @@ from workflow_manager.viewsets.workflow import WorkflowViewSet from workflow_manager.viewsets.workflow_run import WorkflowRunViewSet from workflow_manager.viewsets.payload import PayloadViewSet +from workflow_manager.viewsets.library import LibraryViewSet from workflow_manager.settings.base import API_VERSION api_namespace = "api" @@ -14,7 +15,7 @@ router.register(r"workflow", WorkflowViewSet, basename="workflow") router.register(r"workflowrun", WorkflowRunViewSet, basename="workflowrun") router.register(r"payload", PayloadViewSet, basename="payload") -router.register(r"library", PayloadViewSet, basename="library") +router.register(r"library", LibraryViewSet, basename="library") urlpatterns = [ path(f"{api_base}", include(router.urls)), diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/library.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/library.py index d529aa5a9..bf2d5f150 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/library.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/library.py @@ -11,7 +11,7 @@ class LibraryViewSet(ReadOnlyModelViewSet): pagination_class = StandardResultsSetPagination filter_backends = [filters.OrderingFilter, filters.SearchFilter] ordering_fields = '__all__' - ordering = ['-id'] + ordering = ['-orcabus_id'] search_fields = Library.get_base_fields() def get_queryset(self): diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/workflow_run.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/workflow_run.py index 842abc907..baac9093b 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/workflow_run.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/workflow_run.py @@ -15,4 +15,5 @@ class WorkflowRunViewSet(ReadOnlyModelViewSet): search_fields = WorkflowRun.get_base_fields() def get_queryset(self): + print(self.request.query_params) return WorkflowRun.objects.get_by_keyword(**self.request.query_params) diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py index 76abca99c..dc0295d8c 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py @@ -10,6 +10,7 @@ from django.utils.timezone import make_aware from workflow_manager_proc.domain.executionservice.workflowrunstatechange import ( WorkflowRunStateChange, + LibraryRecord, Marshaller, ) from workflow_manager.models.workflow_run import ( @@ -80,21 +81,24 @@ def handler(event, context): wfr.save() # if the workflow run is linked to library record(s), create the association(s) - input_libraries: list[str] = wrsc.linkedLibraries - for input_lib in input_libraries: - # check if the library has already a DB record - db_lib: Library = Library.objects.get_by_keyword(library_id=input_lib) - # create it if not - if not db_lib: - db_lib = Library.objects.create(library_id=input_lib) + input_libraries: list[LibraryRecord] = wrsc.linkedLibraries + if input_libraries: + for input_rec in input_libraries: + # check if the library has already a DB record + db_lib: Library = Library.objects.get_by_keyword(orcabus_id=input_rec.orcabusId) + # create it if not + if not db_lib: + # TODO: the library record should exist in the future - synced with metadata service on + # LibraryStateChange events + db_lib = Library.objects.create(orcabus_id=input_rec.orcabusId, library_id=input_rec.libraryId) - # create the library association - LibraryAssociation.objects.create( - workflow_run=wfr, - library=db_lib, - association_date=make_aware(datetime.now()), - status=ASSOCIATION_STATUS, - ) + # create the library association + LibraryAssociation.objects.create( + workflow_run=wfr, + library=db_lib, + association_date=make_aware(datetime.now()), + status=ASSOCIATION_STATUS, + ) print(f"{__name__} done.") return wfr # FIXME: serialise in future (json.dumps) diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/get_workflow_run.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/get_workflow_run.py index 13ac0a716..42a6d5f68 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/get_workflow_run.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/get_workflow_run.py @@ -24,11 +24,11 @@ def handler(event, context): # time_window = event.get('time_window', None) # FIXME: make configurable later? qs = WorkflowRun.objects.filter( - portal_run_id = portal_run_id + portal_run_id=portal_run_id ) if status: qs = qs.filter( - status = status + status=status ) if timestamp: dt = datetime.datetime.fromisoformat(str(timestamp)) diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/tests/test_workflow_srv.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/tests/test_workflow_srv.py index a7c8a5407..829c1b0fb 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/tests/test_workflow_srv.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/tests/test_workflow_srv.py @@ -45,7 +45,17 @@ def test_get_workflow_from_db2(self): """ python manage.py test workflow_manager_proc.tests.test_workflow_srv.WorkflowSrvUnitTests.test_get_workflow_from_db2 """ - lib_ids = ["L000001", "L000002"] + library_ids = ["L000001", "L000002"] + lib_ids = [ + { + "libraryId": library_ids[0], + "orcabusId": "lib.01J5M2J44HFJ9424G7074NKTGN" + }, + { + "libraryId": library_ids[1], + "orcabusId": "lib.01J5M2JFE1JPYV62RYQEG99CP5" + } + ] test_event = { "portalRunId": "202405012397gatc", @@ -78,4 +88,5 @@ def test_get_workflow_from_db2(self): self.assertEqual("ctTSO500-L000002", test_wfl.workflow_run_name) libs = test_wfl.libraries.all() for lib in libs: - self.assertIn(lib.library_id, lib_ids) + logger.info(lib) + self.assertIn(lib.library_id, library_ids) From 24a4c158ed0e7f8c471047c2292e16b1f111ac32 Mon Sep 17 00:00:00 2001 From: Florian Reisinger Date: Tue, 20 Aug 2024 14:10:36 +1000 Subject: [PATCH 09/20] Remove library endpoint --- .../stacks/workflow-manager/workflow_manager/urls/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py index e642cc8e2..ce9d7ca47 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py @@ -15,7 +15,7 @@ router.register(r"workflow", WorkflowViewSet, basename="workflow") router.register(r"workflowrun", WorkflowRunViewSet, basename="workflowrun") router.register(r"payload", PayloadViewSet, basename="payload") -router.register(r"library", LibraryViewSet, basename="library") +# router.register(r"library", LibraryViewSet, basename="library") urlpatterns = [ path(f"{api_base}", include(router.urls)), From 437742e08191ff2d43daab9f25a7396c3943a8ef Mon Sep 17 00:00:00 2001 From: Florian Reisinger Date: Tue, 20 Aug 2024 16:50:16 +1000 Subject: [PATCH 10/20] Fix newline --- .../events/workflowmanager/WorkflowRunStateChange.schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/schemas/events/workflowmanager/WorkflowRunStateChange.schema.json b/docs/schemas/events/workflowmanager/WorkflowRunStateChange.schema.json index e3932acd2..e30646448 100644 --- a/docs/schemas/events/workflowmanager/WorkflowRunStateChange.schema.json +++ b/docs/schemas/events/workflowmanager/WorkflowRunStateChange.schema.json @@ -119,4 +119,4 @@ } } } -} \ No newline at end of file +} From e12f0a8cf9eee78553b8fd193e4a168f08e7050b Mon Sep 17 00:00:00 2001 From: Florian Reisinger Date: Tue, 20 Aug 2024 18:00:12 +1000 Subject: [PATCH 11/20] Refactor service methods according to model changes --- .../workflow_manager/models/__init__.py | 1 + .../workflow_manager/models/state.py | 41 +++++++++++++ .../workflow_manager/models/workflow_run.py | 22 +++---- .../lambdas/handle_service_wrsc_event.py | 57 +++++++++++-------- .../services/__init__.py | 28 +++++++++ .../services/create_workflow_run.py | 48 ++++++++-------- .../services/get_workflow_run.py | 49 ---------------- 7 files changed, 135 insertions(+), 111 deletions(-) create mode 100644 lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/state.py delete mode 100644 lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/get_workflow_run.py diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/__init__.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/__init__.py index a1e68b280..81f4f3d24 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/__init__.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/__init__.py @@ -4,3 +4,4 @@ from .payload import Payload from .workflow_run import WorkflowRun, LibraryAssociation from .library import Library +from .state import State \ No newline at end of file diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/state.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/state.py new file mode 100644 index 000000000..76f927ae1 --- /dev/null +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/state.py @@ -0,0 +1,41 @@ +from django.db import models + +from workflow_manager.models.base import OrcaBusBaseModel, OrcaBusBaseManager +from workflow_manager.models import WorkflowRun, Payload + + +class StateManager(OrcaBusBaseManager): + pass + + +class State(OrcaBusBaseModel): + class Meta: + unique_together = ["workflow_run", "status", "timestamp"] + + id = models.BigAutoField(primary_key=True) + + # --- mandatory fields + workflow_run = models.ForeignKey(WorkflowRun, on_delete=models.CASCADE) + status = models.CharField(max_length=255) + timestamp = models.DateTimeField() + + comment = models.CharField(max_length=255, null=True, blank=True) + + # Link to workflow run payload data + payload = models.ForeignKey(Payload, null=True, blank=True, on_delete=models.SET_NULL) + + objects = StateManager() + + def __str__(self): + return f"ID: {self.id}, status: {self.status}" + + def to_dict(self): + return { + "id": self.id, + "workflow_run_id": self.workflow_run.id, + "status": self.status, + "timestamp": str(self.timestamp), + "comment": self.comment, + "payload": self.payload.to_dict() if (self.payload is not None) else None, + } + diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/workflow_run.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/workflow_run.py index baba1278f..26fc64a59 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/workflow_run.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/workflow_run.py @@ -1,9 +1,7 @@ from django.db import models from workflow_manager.models.base import OrcaBusBaseModel, OrcaBusBaseManager -from workflow_manager.models.payload import Payload -from workflow_manager.models.workflow import Workflow -from workflow_manager.models.library import Library +from workflow_manager.models import Workflow, Library, State class WorkflowRunManager(OrcaBusBaseManager): @@ -11,16 +9,15 @@ class WorkflowRunManager(OrcaBusBaseManager): class WorkflowRun(OrcaBusBaseModel): - class Meta: - unique_together = ["portal_run_id", "status", "timestamp"] id = models.BigAutoField(primary_key=True) # --- mandatory fields - portal_run_id = models.CharField(max_length=255) - status = models.CharField(max_length=255) - timestamp = models.DateTimeField() + portal_run_id = models.CharField(max_length=255, unique=True) + current_status = models.CharField(max_length=255) + created = models.DateTimeField() + last_modified = models.DateTimeField() # --- optional fields @@ -34,9 +31,6 @@ class Meta: # Link to workflow table workflow = models.ForeignKey(Workflow, null=True, blank=True, on_delete=models.SET_NULL) - # Link to workflow payload data - payload = models.ForeignKey(Payload, null=True, blank=True, on_delete=models.SET_NULL) - # Link to library table libraries = models.ManyToManyField(Library, through="LibraryAssociation") @@ -49,12 +43,12 @@ def to_dict(self): return { "id": self.id, "portal_run_id": self.portal_run_id, - "status": self.status, - "timestamp": str(self.timestamp), + "current_status": self.current_status, + "created": str(self.created), + "last_modified": str(self.last_modified), "execution_id": self.execution_id, "workflow_run_name": self.workflow_run_name, "comment": self.comment, - "payload": self.payload.to_dict() if (self.payload is not None) else None, "workflow": self.workflow.to_dict() if (self.workflow is not None) else None } diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/lambdas/handle_service_wrsc_event.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/lambdas/handle_service_wrsc_event.py index e19b1c120..cba142950 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/lambdas/handle_service_wrsc_event.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/lambdas/handle_service_wrsc_event.py @@ -3,10 +3,14 @@ django.setup() # --- keep ^^^ at top of the module -from workflow_manager.models.workflow_run import WorkflowRun +import datetime +from workflow_manager.models import WorkflowRun, State import workflow_manager_proc.domain.executionservice.workflowrunstatechange as srv import workflow_manager_proc.domain.workflowmanager.workflowrunstatechange as wfm -from workflow_manager_proc.services import get_workflow_run, create_workflow_run, emit_workflow_run_state_change +from workflow_manager_proc.services import create_workflow_run, emit_workflow_run_state_change, \ + create_workflow_run_state + +default_time_window = datetime.timedelta(hours=1) def handler(event, context): @@ -16,49 +20,56 @@ def handler(event, context): input_event: srv.AWSEvent = srv.Marshaller.unmarshall(event, srv.AWSEvent) input_wrsc: srv.WorkflowRunStateChange = input_event.detail - query = { - "portal_run_id": input_wrsc.portalRunId, - "status": input_wrsc.status, - "timestamp": input_wrsc.timestamp - } - print(f"Finding WorkflowRun records for query:{query}") - wrsc_matches = get_workflow_run.handler(query, None) # FIXME: may only need to be a "exist" query + print(f"Finding WorkflowRun records for portal_run_id:{input_wrsc.portalRunId}") + try: + wfr: WorkflowRun = WorkflowRun.objects.get(portal_run_id=input_wrsc.portalRunId) + except Exception: + wfr: WorkflowRun = create_workflow_run.handler(srv.Marshaller.marshall(input_wrsc), None) + + state_matches = State.objects.filter(workflow_run=wfr) + if input_wrsc.status: + state_matches = state_matches.filter(status=input_wrsc.status) + if input_wrsc.timestamp: + dt = datetime.datetime.fromisoformat(str(input_wrsc.timestamp)) + start_t = dt - default_time_window + end_t = dt + default_time_window + state_matches = state_matches.filter(timestamp__range=(start_t, end_t)) - # check workflow run list - if len(wrsc_matches) == 0: - print(f"No matching WorkflowRun found. Creating...") - # create new entry - db_wfr: WorkflowRun = create_workflow_run.handler(srv.Marshaller.marshall(input_wrsc), None) + # check state list + if len(state_matches) == 0: + print(f"No matching WorkflowRun State found. Creating...") + # create new state entry + wfr_state: State = create_workflow_run_state(wrsc=input_wrsc, wfr=wfr) # create outgoing event - out_event = map_db_record_to_wrsc(db_wfr) + out_event = map_db_record_to_wrsc(wfr, wfr_state) # emit state change print("Emitting WRSC.") emit_workflow_run_state_change.handler(wfm.Marshaller.marshall(out_event), None) else: # ignore - status already exists - print(f"WorkflowRun already exists. Nothing to do.") + print(f"WorkflowRun state already exists. Nothing to do.") print(f"{__name__} done.") -def map_db_record_to_wrsc(db_record: WorkflowRun) -> wfm.WorkflowRunStateChange: +def map_db_record_to_wrsc(db_record: WorkflowRun, state: State) -> wfm.WorkflowRunStateChange: wrsc = wfm.WorkflowRunStateChange( portalRunId=db_record.portal_run_id, - timestamp=db_record.timestamp, - status=db_record.status, + timestamp=state.timestamp, + status=state.status, workflowName=db_record.workflow.workflow_name, workflowVersion=db_record.workflow.workflow_version, workflowRunName=db_record.workflow_run_name, ) # handle condition: Payload is optional - if db_record.payload: + if state.payload: wrsc.payload = wfm.Payload( - refId=db_record.payload.payload_ref_id, - version=db_record.payload.version, - data=db_record.payload.data + refId=state.payload.payload_ref_id, + version=state.payload.version, + data=state.payload.data ) return wrsc diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/__init__.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/__init__.py index e69de29bb..2775986e0 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/__init__.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/__init__.py @@ -0,0 +1,28 @@ +import uuid +from workflow_manager_proc.domain.executionservice.workflowrunstatechange import WorkflowRunStateChange +from workflow_manager.models import WorkflowRun, State, Payload + + +def create_workflow_run_state(wrsc: WorkflowRunStateChange, wfr: WorkflowRun): + input_payload: Payload = wrsc.payload + pld = None + if input_payload: + pld: Payload = Payload( + payload_ref_id=str(uuid.uuid4()), + version=input_payload.version, + data=input_payload.data, + ) + print("Persisting Payload record.") + pld.save() + + # create state for the workflow run + workflow_state: State = State( + workflow_run=wfr, + status=wrsc.status, + timestamp=wrsc.timestamp, + comment=None, + payload=pld + ) + workflow_state.save() + + return workflow_state diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py index dc0295d8c..5310f7472 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py @@ -13,13 +13,15 @@ LibraryRecord, Marshaller, ) -from workflow_manager.models.workflow_run import ( +from workflow_manager.models import ( WorkflowRun, Workflow, + State, Payload, Library, LibraryAssociation, ) +from . import create_workflow_run_state ASSOCIATION_STATUS = "ACTIVE" @@ -53,33 +55,29 @@ def handler(event, context): print("Persisting Workflow record.") workflow.save() - # then create the actual workflow run state change entry - wfr = WorkflowRun( - workflow=workflow, - portal_run_id=wrsc.portalRunId, - execution_id=wrsc.executionId, # the execution service WRSC does carry the execution ID - workflow_run_name=wrsc.workflowRunName, - status=wrsc.status, - comment=None, - timestamp=wrsc.timestamp, - ) - - # if payload is not null, create a new payload entry and assign a unique reference ID for it - input_payload: Payload = wrsc.payload - if input_payload: - pld = Payload( - payload_ref_id=str(uuid.uuid4()), - version=input_payload.version, - data=input_payload.data, + # then create the actual workflow run entry if it does not exist + try: + wfr: WorkflowRun = WorkflowRun.objects.get(portal_run_id=wrsc.portalRunId) + wfr.current_status = wrsc.status + wfr.last_modified = wrsc.timestamp + except Exception: + print("No workflow found! Creating new entry.") + wfr = WorkflowRun( + workflow=workflow, + portal_run_id=wrsc.portalRunId, + execution_id=wrsc.executionId, # the execution service WRSC does carry the execution ID + workflow_run_name=wrsc.workflowRunName, + current_status=wrsc.status, + comment=None, + last_modified=wrsc.timestamp, + created=wrsc.timestamp ) - print("Persisting Payload record.") - pld.save() - - wfr.payload = pld # Note: payload type depend on workflow + status and will carry a version in it - - print("Persisting WorkflowRun record.") + print("Persisting Workflow record.") wfr.save() + # create the related state & payload entries for the WRSC + create_workflow_run_state(wrsc=wrsc, wfr=wfr) + # if the workflow run is linked to library record(s), create the association(s) input_libraries: list[LibraryRecord] = wrsc.linkedLibraries if input_libraries: diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/get_workflow_run.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/get_workflow_run.py deleted file mode 100644 index 42a6d5f68..000000000 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/get_workflow_run.py +++ /dev/null @@ -1,49 +0,0 @@ -# import django - -# django.setup() - -# # --- keep ^^^ at top of the module -import datetime -from workflow_manager.models.workflow_run import WorkflowRun - -default_time_window = datetime.timedelta(hours=1) - -def handler(event, context): - """event will be - { - portal_run_id: "", - status: "", # optional - timestamp: "" # optional - time_window: "" # currenty not used, defaults 1h - } - """ - print(f"Processing get_workflow_run with: {event}, {context}") - portal_run_id = event['portal_run_id'] - status = event.get('status', None) - timestamp = event.get('timestamp', None) - # time_window = event.get('time_window', None) # FIXME: make configurable later? - - qs = WorkflowRun.objects.filter( - portal_run_id=portal_run_id - ) - if status: - qs = qs.filter( - status=status - ) - if timestamp: - dt = datetime.datetime.fromisoformat(str(timestamp)) - print(f"Filter for time window around: {str(timestamp)}") - start_t = dt - default_time_window - end_t = dt + default_time_window - print(f"Time window from {start_t} to {end_t}.") - qs = qs.filter( - timestamp__range=(start_t, end_t) - ) - - workflow_runs = [] - for w in qs.all(): - workflow_runs.append(w) - print(w.to_dict()) - print(f"Found {len(workflow_runs)} WorkflowRun records.") - - return workflow_runs # FIXME: need to deserialise in future From 22c8c6259fef91955157f9f60eb0b7d0f659f830 Mon Sep 17 00:00:00 2001 From: William Putra Intan <61998484+williamputraintan@users.noreply.github.com> Date: Wed, 21 Aug 2024 09:38:13 +1000 Subject: [PATCH 12/20] metadata-manager: model refactoring (#508) --- .../stacks/metadata-manager/Dockerfile | 2 +- .../stacks/metadata-manager/Makefile | 3 + .../stacks/metadata-manager/README.md | 43 ++++++---- .../app/migrations/0001_initial.py | 86 +++++++++---------- ...ter_historicallibrary_coverage_and_more.py | 53 ------------ ...cts_historicalspecimen_subject_and_more.py | 31 ------- .../metadata-manager/app/models/base.py | 17 ++++ .../app/models/lab/individual.py | 8 ++ .../app/models/lab/library.py | 19 +++- .../app/models/lab/specimen.py | 11 ++- .../app/models/lab/subject.py | 10 ++- .../metadata-manager/app/settings/local.py | 5 ++ .../metadata-manager/app/tests/factories.py | 18 ++-- .../metadata-manager/app/tests/test_models.py | 31 ++++--- .../app/tests/test_viewsets.py | 24 +++--- .../metadata-manager/app/viewsets/lab.py | 59 +++++++++---- .../deps/requirements-slim.txt | 1 + .../metadata-manager/docs/schema.drawio.svg | 2 +- .../proc/service/tracking_sheet_srv.py | 37 ++++---- .../proc/tests/test_tracking_sheet_srv.py | 40 +++++---- 20 files changed, 258 insertions(+), 242 deletions(-) delete mode 100644 lib/workload/stateless/stacks/metadata-manager/app/migrations/0002_alter_historicallibrary_coverage_and_more.py delete mode 100644 lib/workload/stateless/stacks/metadata-manager/app/migrations/0003_remove_specimen_subjects_historicalspecimen_subject_and_more.py diff --git a/lib/workload/stateless/stacks/metadata-manager/Dockerfile b/lib/workload/stateless/stacks/metadata-manager/Dockerfile index 0b2a88d46..d0716ae56 100644 --- a/lib/workload/stateless/stacks/metadata-manager/Dockerfile +++ b/lib/workload/stateless/stacks/metadata-manager/Dockerfile @@ -10,4 +10,4 @@ COPY . . RUN make install -ENTRYPOINT ["make", "start"] +ENTRYPOINT ["make", "docker-start"] diff --git a/lib/workload/stateless/stacks/metadata-manager/Makefile b/lib/workload/stateless/stacks/metadata-manager/Makefile index 43a3f8f3d..e74d81833 100644 --- a/lib/workload/stateless/stacks/metadata-manager/Makefile +++ b/lib/workload/stateless/stacks/metadata-manager/Makefile @@ -57,6 +57,9 @@ s3-load: s3-dump-download-if-not-exists db-load-data # application start: migrate + @python manage.py runserver + +docker-start: migrate @python manage.py runserver 0.0.0.0:8000 stop: down diff --git a/lib/workload/stateless/stacks/metadata-manager/README.md b/lib/workload/stateless/stacks/metadata-manager/README.md index 37f1a0be8..77242c0e0 100644 --- a/lib/workload/stateless/stacks/metadata-manager/README.md +++ b/lib/workload/stateless/stacks/metadata-manager/README.md @@ -20,20 +20,20 @@ export ORCABUS_TOKEN=$(aws secretsmanager get-secret-value --secret-id orcabus/t The API currently supports the following paths: -- https://metadata.[STAGE].umccr.org/library -- https://metadata.[STAGE].umccr.org/specimen -- https://metadata.[STAGE].umccr.org/subject +- https://metadata.[STAGE].umccr.org/api/v1/library +- https://metadata.[STAGE].umccr.org/api/v1/specimen +- https://metadata.[STAGE].umccr.org/api/v1/subject Stage means the environment where the API is deployed, it could be `dev`, `stg`, or `prod` (or omit this for prod). An example of how to use a curl command to access the production API: ```sh -curl -s -H "Authorization: Bearer $ORCABUS_TOKEN" "https://metadata.umccr.org/library" | jq +curl -s -H "Authorization: Bearer $ORCABUS_TOKEN" "https://metadata.umccr.org/api/v1/library" | jq ``` Filtering of results is also supported by the API. For example, to filter by `internal_id`, append the query parameter -to the URL: `.../library?internal_id=LIB001` +to the URL: `.../library?library_id=LIB001` ## Schema @@ -43,6 +43,12 @@ This is the current (WIP) schema that reflects the current implementation. To modify the diagram, open the `docs/schema.drawio.svg` with [diagrams.net](https://app.diagrams.net/?src=about). +`orcabus_id` is the unique identifier for each record in the database. It is generated by the application where the first 3 characters are the model prefix followed by [ULID](https://pypi.org/project/ulid-py/) separated by a dot (.). +The prefix is as follows: +- Library model are `lib` +- Specimen model are `spc` +- Subject model are `sbj` + ## How things work ### How Syncing The Data Works @@ -51,18 +57,21 @@ In the near future, we might introduce different ways to load data into the appl loading data from the Google tracking sheet and mapping it to its respective model as follows. -| Sheet Header | Table | Field Name | -|--------------|------------|------------| -| SubjectID | `Subject` | internalId | -| SampleID | `Specimen` | internalId | -| Source | `Specimen` | source | -| LibraryID | `Library` | internalId | -| Phenotype | `Library` | phenotype | -| Workflow | `Library` | workflow | -| Quality | `Library` | quality | -| Type | `Library` | type | -| Coverage (X) | `Library` | coverage | -| Assay | `Library` | assay | +| Sheet Header | Table | Field Name | +|--------------|------------|---------------| +| SubjectID | `Subject` | subject_id | +| SampleID | `Specimen` | sample_id | +| Source | `Specimen` | source | +| LibraryID | `Library` | library_id | +| Phenotype | `Library` | phenotype | +| Workflow | `Library` | workflow | +| Quality | `Library` | quality | +| Type | `Library` | type | +| Coverage (X) | `Library` | coverage | +| Assay | `Library` | assay | +| ProjectOwner | `Library` | project_owner | +| ProjectName | `Library` | project_name | + Some important notes of the sync: diff --git a/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py b/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py index 027462b36..6337867eb 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/migrations/0001_initial.py @@ -1,5 +1,6 @@ -# Generated by Django 5.0 on 2024-04-06 02:53 +# Generated by Django 5.1 on 2024-08-20 07:08 +import django.core.validators import django.db.models.deletion import simple_history.models from django.conf import settings @@ -18,9 +19,9 @@ class Migration(migrations.Migration): migrations.CreateModel( name='Specimen', fields=[ - ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('internal_id', models.CharField(blank=True, null=True, unique=True)), - ('source', models.CharField(blank=True, choices=[('ascites', 'Ascites'), ('blood', 'Blood'), ('bone-marrow', 'Bone Marrow'), ('buccal', 'Buccal'), ('cell-line', 'Cellline'), ('cfDNA', 'Cfdna'), ('cyst-fluid', 'Cyst Fluid'), ('DNA', 'Dna'), ('eyebrow-hair', 'Eyebrow Hair'), ('FFPE', 'Ffpe'), ('FNA', 'Fna'), ('OCT', 'Oct'), ('organoid', 'Organoid'), ('PDX-tissue', 'Pdx Tissue'), ('plasma-serum', 'Plasma Serum'), ('RNA', 'Rna'), ('tissue', 'Tissue'), ('skin', 'Skin'), ('water', 'Water')], null=True)), + ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), + ('specimen_id', models.CharField(blank=True, null=True, unique=True)), + ('source', models.CharField(blank=True, choices=[('ascites', 'Ascites'), ('blood', 'Blood'), ('bone-marrow', 'BoneMarrow'), ('buccal', 'Buccal'), ('cell-line', 'Cell_line'), ('cfDNA', 'Cfdna'), ('cyst-fluid', 'Cyst Fluid'), ('DNA', 'Dna'), ('eyebrow-hair', 'Eyebrow Hair'), ('FFPE', 'Ffpe'), ('FNA', 'Fna'), ('OCT', 'Oct'), ('organoid', 'Organoid'), ('PDX-tissue', 'Pdx Tissue'), ('plasma-serum', 'Plasma Serum'), ('RNA', 'Rna'), ('tissue', 'Tissue'), ('skin', 'Skin'), ('water', 'Water')], null=True)), ], options={ 'abstract': False, @@ -29,38 +30,18 @@ class Migration(migrations.Migration): migrations.CreateModel( name='Subject', fields=[ - ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('internal_id', models.CharField(blank=True, null=True, unique=True)), + ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), + ('subject_id', models.CharField(blank=True, null=True, unique=True)), ], options={ 'abstract': False, }, ), - migrations.CreateModel( - name='HistoricalSpecimen', - fields=[ - ('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')), - ('internal_id', models.CharField(blank=True, db_index=True, null=True)), - ('source', models.CharField(blank=True, choices=[('ascites', 'Ascites'), ('blood', 'Blood'), ('bone-marrow', 'Bone Marrow'), ('buccal', 'Buccal'), ('cell-line', 'Cellline'), ('cfDNA', 'Cfdna'), ('cyst-fluid', 'Cyst Fluid'), ('DNA', 'Dna'), ('eyebrow-hair', 'Eyebrow Hair'), ('FFPE', 'Ffpe'), ('FNA', 'Fna'), ('OCT', 'Oct'), ('organoid', 'Organoid'), ('PDX-tissue', 'Pdx Tissue'), ('plasma-serum', 'Plasma Serum'), ('RNA', 'Rna'), ('tissue', 'Tissue'), ('skin', 'Skin'), ('water', 'Water')], null=True)), - ('history_id', models.AutoField(primary_key=True, serialize=False)), - ('history_date', models.DateTimeField(db_index=True)), - ('history_change_reason', models.CharField(max_length=100, null=True)), - ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), - ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), - ], - options={ - 'verbose_name': 'historical specimen', - 'verbose_name_plural': 'historical specimens', - 'ordering': ('-history_date', '-history_id'), - 'get_latest_by': ('history_date', 'history_id'), - }, - bases=(simple_history.models.HistoricalChanges, models.Model), - ), migrations.CreateModel( name='HistoricalSubject', fields=[ - ('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')), - ('internal_id', models.CharField(blank=True, db_index=True, null=True)), + ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), + ('subject_id', models.CharField(blank=True, db_index=True, null=True)), ('history_id', models.AutoField(primary_key=True, serialize=False)), ('history_date', models.DateTimeField(db_index=True)), ('history_change_reason', models.CharField(max_length=100, null=True)), @@ -78,14 +59,16 @@ class Migration(migrations.Migration): migrations.CreateModel( name='Library', fields=[ - ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('internal_id', models.CharField(blank=True, null=True, unique=True)), + ('orcabus_id', models.CharField(editable=False, primary_key=True, serialize=False, unique=True, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), + ('library_id', models.CharField(blank=True, null=True, unique=True)), ('phenotype', models.CharField(blank=True, choices=[('normal', 'Normal'), ('tumor', 'Tumor'), ('negative-control', 'Negative Control')], null=True)), ('workflow', models.CharField(blank=True, choices=[('clinical', 'Clinical'), ('research', 'Research'), ('qc', 'Qc'), ('control', 'Control'), ('bcl', 'Bcl'), ('manual', 'Manual')], null=True)), - ('quality', models.CharField(blank=True, choices=[('very-poor', 'Very Poor'), ('poor', 'Poor'), ('good', 'Good'), ('borderline', 'Borderline')], null=True)), - ('type', models.CharField(blank=True, choices=[('10X', 'Ten X'), ('BiModal', 'Bimodal'), ('ctDNA', 'Ct Dna'), ('ctTSO', 'Ct Tso'), ('exome', 'Exome'), ('MeDIP', 'Me Dip'), ('Metagenm', 'Metagenm'), ('MethylSeq', 'Methyl Seq'), ('other', 'Other'), ('TSO-DNA', 'Tso Dna'), ('TSO-RNA', 'Tso Rna'), ('WGS', 'Wgs'), ('WTS', 'Wts')], null=True)), + ('quality', models.CharField(blank=True, choices=[('very-poor', 'VeryPoor'), ('poor', 'Poor'), ('good', 'Good'), ('borderline', 'Borderline')], null=True)), + ('type', models.CharField(blank=True, choices=[('10X', 'Ten X'), ('BiModal', 'Bimodal'), ('ctDNA', 'Ct Dna'), ('ctTSO', 'Ct Tso'), ('exome', 'Exome'), ('MeDIP', 'Me Dip'), ('Metagenm', 'Metagenm'), ('MethylSeq', 'Methyl Seq'), ('TSO-DNA', 'TSO_DNA'), ('TSO-RNA', 'TSO_RNA'), ('WGS', 'Wgs'), ('WTS', 'Wts'), ('other', 'Other')], null=True)), ('assay', models.CharField(blank=True, null=True)), - ('coverage', models.DecimalField(blank=True, decimal_places=2, max_digits=10, null=True)), + ('coverage', models.FloatField(blank=True, null=True)), + ('project_owner', models.CharField(blank=True, null=True)), + ('project_name', models.CharField(blank=True, null=True)), ('specimen', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='app.specimen')), ], options={ @@ -95,14 +78,16 @@ class Migration(migrations.Migration): migrations.CreateModel( name='HistoricalLibrary', fields=[ - ('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')), - ('internal_id', models.CharField(blank=True, db_index=True, null=True)), + ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), + ('library_id', models.CharField(blank=True, db_index=True, null=True)), ('phenotype', models.CharField(blank=True, choices=[('normal', 'Normal'), ('tumor', 'Tumor'), ('negative-control', 'Negative Control')], null=True)), ('workflow', models.CharField(blank=True, choices=[('clinical', 'Clinical'), ('research', 'Research'), ('qc', 'Qc'), ('control', 'Control'), ('bcl', 'Bcl'), ('manual', 'Manual')], null=True)), - ('quality', models.CharField(blank=True, choices=[('very-poor', 'Very Poor'), ('poor', 'Poor'), ('good', 'Good'), ('borderline', 'Borderline')], null=True)), - ('type', models.CharField(blank=True, choices=[('10X', 'Ten X'), ('BiModal', 'Bimodal'), ('ctDNA', 'Ct Dna'), ('ctTSO', 'Ct Tso'), ('exome', 'Exome'), ('MeDIP', 'Me Dip'), ('Metagenm', 'Metagenm'), ('MethylSeq', 'Methyl Seq'), ('other', 'Other'), ('TSO-DNA', 'Tso Dna'), ('TSO-RNA', 'Tso Rna'), ('WGS', 'Wgs'), ('WTS', 'Wts')], null=True)), + ('quality', models.CharField(blank=True, choices=[('very-poor', 'VeryPoor'), ('poor', 'Poor'), ('good', 'Good'), ('borderline', 'Borderline')], null=True)), + ('type', models.CharField(blank=True, choices=[('10X', 'Ten X'), ('BiModal', 'Bimodal'), ('ctDNA', 'Ct Dna'), ('ctTSO', 'Ct Tso'), ('exome', 'Exome'), ('MeDIP', 'Me Dip'), ('Metagenm', 'Metagenm'), ('MethylSeq', 'Methyl Seq'), ('TSO-DNA', 'TSO_DNA'), ('TSO-RNA', 'TSO_RNA'), ('WGS', 'Wgs'), ('WTS', 'Wts'), ('other', 'Other')], null=True)), ('assay', models.CharField(blank=True, null=True)), - ('coverage', models.DecimalField(blank=True, decimal_places=2, max_digits=10, null=True)), + ('coverage', models.FloatField(blank=True, null=True)), + ('project_owner', models.CharField(blank=True, null=True)), + ('project_name', models.CharField(blank=True, null=True)), ('history_id', models.AutoField(primary_key=True, serialize=False)), ('history_date', models.DateTimeField(db_index=True)), ('history_change_reason', models.CharField(max_length=100, null=True)), @@ -120,20 +105,27 @@ class Migration(migrations.Migration): ), migrations.AddField( model_name='specimen', - name='subjects', - field=models.ManyToManyField(blank=True, to='app.subject'), + name='subject', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='app.subject'), ), migrations.CreateModel( - name='HistoricalSpecimen_subjects', + name='HistoricalSpecimen', fields=[ - ('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')), - ('m2m_history_id', models.AutoField(primary_key=True, serialize=False)), - ('history', models.ForeignKey(db_constraint=False, on_delete=django.db.models.deletion.DO_NOTHING, to='app.historicalspecimen')), - ('specimen', models.ForeignKey(blank=True, db_constraint=False, db_tablespace='', null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.specimen')), - ('subject', models.ForeignKey(blank=True, db_constraint=False, db_tablespace='', null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.subject')), + ('orcabus_id', models.CharField(db_index=True, editable=False, validators=[django.core.validators.RegexValidator(code='invalid_orcabus_id', message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', regex='^[\\w]{3}\\.[\\w]{26}$')])), + ('specimen_id', models.CharField(blank=True, db_index=True, null=True)), + ('source', models.CharField(blank=True, choices=[('ascites', 'Ascites'), ('blood', 'Blood'), ('bone-marrow', 'BoneMarrow'), ('buccal', 'Buccal'), ('cell-line', 'Cell_line'), ('cfDNA', 'Cfdna'), ('cyst-fluid', 'Cyst Fluid'), ('DNA', 'Dna'), ('eyebrow-hair', 'Eyebrow Hair'), ('FFPE', 'Ffpe'), ('FNA', 'Fna'), ('OCT', 'Oct'), ('organoid', 'Organoid'), ('PDX-tissue', 'Pdx Tissue'), ('plasma-serum', 'Plasma Serum'), ('RNA', 'Rna'), ('tissue', 'Tissue'), ('skin', 'Skin'), ('water', 'Water')], null=True)), + ('history_id', models.AutoField(primary_key=True, serialize=False)), + ('history_date', models.DateTimeField(db_index=True)), + ('history_change_reason', models.CharField(max_length=100, null=True)), + ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), + ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ('subject', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.subject')), ], options={ - 'verbose_name': 'HistoricalSpecimen_subjects', + 'verbose_name': 'historical specimen', + 'verbose_name_plural': 'historical specimens', + 'ordering': ('-history_date', '-history_id'), + 'get_latest_by': ('history_date', 'history_id'), }, bases=(simple_history.models.HistoricalChanges, models.Model), ), diff --git a/lib/workload/stateless/stacks/metadata-manager/app/migrations/0002_alter_historicallibrary_coverage_and_more.py b/lib/workload/stateless/stacks/metadata-manager/app/migrations/0002_alter_historicallibrary_coverage_and_more.py deleted file mode 100644 index a30fd3e74..000000000 --- a/lib/workload/stateless/stacks/metadata-manager/app/migrations/0002_alter_historicallibrary_coverage_and_more.py +++ /dev/null @@ -1,53 +0,0 @@ -# Generated by Django 5.0.3 on 2024-06-19 04:13 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('app', '0001_initial'), - ] - - operations = [ - migrations.AlterField( - model_name='historicallibrary', - name='coverage', - field=models.FloatField(blank=True, null=True), - ), - migrations.AlterField( - model_name='historicallibrary', - name='quality', - field=models.CharField(blank=True, choices=[('very-poor', 'VeryPoor'), ('poor', 'Poor'), ('good', 'Good'), ('borderline', 'Borderline')], null=True), - ), - migrations.AlterField( - model_name='historicallibrary', - name='type', - field=models.CharField(blank=True, choices=[('10X', 'Ten X'), ('BiModal', 'Bimodal'), ('ctDNA', 'Ct Dna'), ('ctTSO', 'Ct Tso'), ('exome', 'Exome'), ('MeDIP', 'Me Dip'), ('Metagenm', 'Metagenm'), ('MethylSeq', 'Methyl Seq'), ('TSO-DNA', 'TSO_DNA'), ('TSO-RNA', 'TSO_RNA'), ('WGS', 'Wgs'), ('WTS', 'Wts'), ('other', 'Other')], null=True), - ), - migrations.AlterField( - model_name='historicalspecimen', - name='source', - field=models.CharField(blank=True, choices=[('ascites', 'Ascites'), ('blood', 'Blood'), ('bone-marrow', 'BoneMarrow'), ('buccal', 'Buccal'), ('cell-line', 'Cell_line'), ('cfDNA', 'Cfdna'), ('cyst-fluid', 'Cyst Fluid'), ('DNA', 'Dna'), ('eyebrow-hair', 'Eyebrow Hair'), ('FFPE', 'Ffpe'), ('FNA', 'Fna'), ('OCT', 'Oct'), ('organoid', 'Organoid'), ('PDX-tissue', 'Pdx Tissue'), ('plasma-serum', 'Plasma Serum'), ('RNA', 'Rna'), ('tissue', 'Tissue'), ('skin', 'Skin'), ('water', 'Water')], null=True), - ), - migrations.AlterField( - model_name='library', - name='coverage', - field=models.FloatField(blank=True, null=True), - ), - migrations.AlterField( - model_name='library', - name='quality', - field=models.CharField(blank=True, choices=[('very-poor', 'VeryPoor'), ('poor', 'Poor'), ('good', 'Good'), ('borderline', 'Borderline')], null=True), - ), - migrations.AlterField( - model_name='library', - name='type', - field=models.CharField(blank=True, choices=[('10X', 'Ten X'), ('BiModal', 'Bimodal'), ('ctDNA', 'Ct Dna'), ('ctTSO', 'Ct Tso'), ('exome', 'Exome'), ('MeDIP', 'Me Dip'), ('Metagenm', 'Metagenm'), ('MethylSeq', 'Methyl Seq'), ('TSO-DNA', 'TSO_DNA'), ('TSO-RNA', 'TSO_RNA'), ('WGS', 'Wgs'), ('WTS', 'Wts'), ('other', 'Other')], null=True), - ), - migrations.AlterField( - model_name='specimen', - name='source', - field=models.CharField(blank=True, choices=[('ascites', 'Ascites'), ('blood', 'Blood'), ('bone-marrow', 'BoneMarrow'), ('buccal', 'Buccal'), ('cell-line', 'Cell_line'), ('cfDNA', 'Cfdna'), ('cyst-fluid', 'Cyst Fluid'), ('DNA', 'Dna'), ('eyebrow-hair', 'Eyebrow Hair'), ('FFPE', 'Ffpe'), ('FNA', 'Fna'), ('OCT', 'Oct'), ('organoid', 'Organoid'), ('PDX-tissue', 'Pdx Tissue'), ('plasma-serum', 'Plasma Serum'), ('RNA', 'Rna'), ('tissue', 'Tissue'), ('skin', 'Skin'), ('water', 'Water')], null=True), - ), - ] diff --git a/lib/workload/stateless/stacks/metadata-manager/app/migrations/0003_remove_specimen_subjects_historicalspecimen_subject_and_more.py b/lib/workload/stateless/stacks/metadata-manager/app/migrations/0003_remove_specimen_subjects_historicalspecimen_subject_and_more.py deleted file mode 100644 index 6b4022380..000000000 --- a/lib/workload/stateless/stacks/metadata-manager/app/migrations/0003_remove_specimen_subjects_historicalspecimen_subject_and_more.py +++ /dev/null @@ -1,31 +0,0 @@ -# Generated by Django 5.0.3 on 2024-07-11 02:43 - -import django.db.models.deletion -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('app', '0002_alter_historicallibrary_coverage_and_more'), - ] - - operations = [ - migrations.RemoveField( - model_name='specimen', - name='subjects', - ), - migrations.AddField( - model_name='historicalspecimen', - name='subject', - field=models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='app.subject'), - ), - migrations.AddField( - model_name='specimen', - name='subject', - field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='app.subject'), - ), - migrations.DeleteModel( - name='HistoricalSpecimen_subjects', - ), - ] diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/base.py b/lib/workload/stateless/stacks/metadata-manager/app/models/base.py index a6b03bce6..10327fb84 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/base.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/base.py @@ -1,9 +1,11 @@ import logging import operator +import ulid from functools import reduce from typing import List from django.core.exceptions import FieldError +from django.core.validators import RegexValidator from django.db import models from django.db.models import ( Q, @@ -82,6 +84,21 @@ class BaseModel(models.Model): class Meta: abstract = True + orcabus_id = models.CharField( + primary_key=True, + unique=True, + editable=False, + blank=False, + null=False, + validators=[ + RegexValidator( + regex=r'^[\w]{3}\.[\w]{26}$', + message='orcabus_id must start with a 3-character prefix, followed by a dot separator and a ULID', + code='invalid_orcabus_id' + )] + + ) + def save(self, *args, **kwargs): self.full_clean() return super(BaseModel, self).save(*args, **kwargs) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/individual.py b/lib/workload/stateless/stacks/metadata-manager/app/models/lab/individual.py index c6d3b4de5..7638f60ce 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/individual.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/lab/individual.py @@ -1,5 +1,7 @@ import logging +import ulid +from django.core.validators import RegexValidator from django.db import models from django.db.models import QuerySet from simple_history.models import HistoricalRecords @@ -12,6 +14,7 @@ class IndividualManager(BaseManager): class Individual(BaseModel): + orcabus_id_prefix = 'idv' objects = IndividualManager() internal_id = models.CharField( @@ -21,3 +24,8 @@ class Individual(BaseModel): ) history = HistoricalRecords() + + def save(self, *args, **kwargs): + if not self.orcabus_id: + self.orcabus_id = self.orcabus_id_prefix + '.' + ulid.new().str + super().save(*args, **kwargs) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/library.py b/lib/workload/stateless/stacks/metadata-manager/app/models/lab/library.py index aba20ae21..8caa24861 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/library.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/lab/library.py @@ -1,6 +1,8 @@ import logging +import ulid from django.db import models +from django.core.validators import RegexValidator from simple_history.models import HistoricalRecords from app.models.base import BaseManager, BaseModel @@ -53,9 +55,11 @@ class LibraryManager(BaseManager): class Library(BaseModel): + orcabus_id_prefix = 'lib' + objects = LibraryManager() - internal_id = models.CharField( + library_id = models.CharField( unique=True, blank=True, null=True @@ -88,6 +92,19 @@ class Library(BaseModel): blank=True, null=True ) + project_owner = models.CharField( + blank=True, + null=True + ) + project_name = models.CharField( + blank=True, + null=True + ) specimen = models.ForeignKey(Specimen, on_delete=models.SET_NULL, blank=True, null=True) history = HistoricalRecords() + + def save(self, *args, **kwargs): + if not self.orcabus_id: + self.orcabus_id = self.orcabus_id_prefix + '.' + ulid.new().str + super().save(*args, **kwargs) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/specimen.py b/lib/workload/stateless/stacks/metadata-manager/app/models/lab/specimen.py index d22a6dbbd..c45f2a47d 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/specimen.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/lab/specimen.py @@ -1,5 +1,7 @@ import logging +import ulid +from django.core.validators import RegexValidator from django.db import models from django.db.models import QuerySet from simple_history.models import HistoricalRecords @@ -35,9 +37,11 @@ class SpecimenManager(BaseManager): class Specimen(BaseModel): + orcabus_id_prefix = 'spc' + objects = SpecimenManager() - internal_id = models.CharField( + specimen_id = models.CharField( unique=True, blank=True, null=True @@ -46,3 +50,8 @@ class Specimen(BaseModel): subject = models.ForeignKey(Subject, on_delete=models.SET_NULL, blank=True, null=True) history = HistoricalRecords() + + def save(self, *args, **kwargs): + if not self.orcabus_id: + self.orcabus_id = self.orcabus_id_prefix + '.' + ulid.new().str + super().save(*args, **kwargs) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/subject.py b/lib/workload/stateless/stacks/metadata-manager/app/models/lab/subject.py index 235de6e5f..d29627593 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/models/lab/subject.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/models/lab/subject.py @@ -1,4 +1,6 @@ +import ulid from django.db import models +from django.core.validators import RegexValidator from simple_history.models import HistoricalRecords from app.models.base import BaseModel, BaseManager @@ -9,11 +11,17 @@ class SubjectManager(BaseManager): class Subject(BaseModel): + orcabus_id_prefix = 'sbj' objects = SubjectManager() - internal_id = models.CharField( + subject_id = models.CharField( unique=True, blank=True, null=True ) history = HistoricalRecords() + + def save(self, *args, **kwargs): + if not self.orcabus_id: + self.orcabus_id = self.orcabus_id_prefix + '.' + ulid.new().str + super().save(*args, **kwargs) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/settings/local.py b/lib/workload/stateless/stacks/metadata-manager/app/settings/local.py index 9d673351e..e9d141b6d 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/settings/local.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/settings/local.py @@ -56,6 +56,11 @@ "description": "Terms of service", "url": "https://umccr.org/", }, + 'CAMELIZE_NAMES': False, + 'POSTPROCESSING_HOOKS': [ + 'drf_spectacular.contrib.djangorestframework_camel_case.camelize_serializer_fields', + 'drf_spectacular.hooks.postprocess_schema_enums' + ], } REDOC_SETTINGS = { diff --git a/lib/workload/stateless/stacks/metadata-manager/app/tests/factories.py b/lib/workload/stateless/stacks/metadata-manager/app/tests/factories.py index be68660d1..ced5e26d4 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/tests/factories.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/tests/factories.py @@ -3,27 +3,29 @@ from app.models import Subject, Specimen, Library INDIVIDUAL_1 = { - "internal_id": "I001" + "individual_id": "I001" } SUBJECT_1 = { - "internal_id": "SBJ001", + "subject_id": "SBJ001", "externalId": "EXTSUBIDA" } SPECIMEN_1 = { - "internal_id": "PRJ001", + "specimen_id": "PRJ001", "source": "FFPE" } LIBRARY_1 = { - "internal_id": "LIB01", + "library_id": "LIB01", "phenotype": "negative-control", "workflow": "clinical", "quality": "good", "type": "WTS", "assay": "NebRNA", - "coverage": 6.0 + "coverage": 6.0, + 'project_owner': 'test_owner', + 'project_name': 'test_project' } @@ -31,14 +33,14 @@ class SubjectFactory(factory.django.DjangoModelFactory): class Meta: model = Subject - internal_id = SUBJECT_1['internal_id'] + subject_id = SUBJECT_1['subject_id'] class SpecimenFactory(factory.django.DjangoModelFactory): class Meta: model = Specimen - internal_id = SPECIMEN_1['internal_id'] + specimen_id = SPECIMEN_1['specimen_id'] source = SPECIMEN_1['source'] @@ -46,7 +48,7 @@ class LibraryFactory(factory.django.DjangoModelFactory): class Meta: model = Library - internal_id = LIBRARY_1["internal_id"] + library_id = LIBRARY_1["library_id"] phenotype = LIBRARY_1["phenotype"] workflow = LIBRARY_1["workflow"] quality = LIBRARY_1["quality"] diff --git a/lib/workload/stateless/stacks/metadata-manager/app/tests/test_models.py b/lib/workload/stateless/stacks/metadata-manager/app/tests/test_models.py index 133a5831a..46204c4b0 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/tests/test_models.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/tests/test_models.py @@ -1,6 +1,7 @@ import logging from django.test import TestCase +import ulid from app.models import Subject, Specimen, Library @@ -11,28 +12,30 @@ class MetadataTestCase(TestCase): def setUp(self): subject = Subject.objects.create( - internal_id='SBJ001', + orcabus_id=f'sbj.{ulid.new().str}', + subject_id='SBJ001', ) subject.full_clean() subject.save() specimen = Specimen.objects.create( - internal_id='SPC001', + specimen_id='SPC001', subject=subject, ) specimen.full_clean() specimen.save() library = Library.objects.create( - id=1, - internal_id='L001', + library_id='L001', phenotype='negative-control', workflow='clinical', quality='poor', type='WTS', assay='NebRNA', coverage='6.3', - specimen=specimen + specimen=specimen, + project_name='test_project', + project_owner='test_owner', ) library.full_clean() library.save() @@ -43,14 +46,14 @@ def test_get_simple_model(self): """ logger.info("Test get on simple lab models") - lib_one = Library.objects.get(internal_id="L001") - self.assertEqual(lib_one.internal_id, "L001", "incorrect 'id' from given internal library id") + lib_one = Library.objects.get(library_id="L001") + self.assertEqual(lib_one.library_id, "L001", "incorrect 'id' from given internal library id") - spc_one = Specimen.objects.get(internal_id="SPC001") - self.assertEqual(spc_one.internal_id, "SPC001", "incorrect 'id' from given internal specimen id") + spc_one = Specimen.objects.get(specimen_id="SPC001") + self.assertEqual(spc_one.specimen_id, "SPC001", "incorrect 'id' from given internal specimen id") - sub_one = Subject.objects.get(internal_id="SBJ001") - self.assertEqual(sub_one.internal_id, "SBJ001", "incorrect 'id' from given internal subject id") + sub_one = Subject.objects.get(subject_id="SBJ001") + self.assertEqual(sub_one.subject_id, "SBJ001", "incorrect 'id' from given internal subject id") def test_metadata_model_relationship(self): """ @@ -58,12 +61,12 @@ def test_metadata_model_relationship(self): """ logger.info("Test the relationship model within the lab metadata") - lib_one = Library.objects.get(internal_id="L001") + lib_one = Library.objects.get(library_id="L001") # find the linked specimen spc_one = lib_one.specimen - self.assertEqual(spc_one.internal_id, "SPC001", "incorrect specimen 'id' should linked to library") + self.assertEqual(spc_one.specimen_id, "SPC001", "incorrect specimen 'id' should linked to library") # find the linked subject sub_one = spc_one.subject - self.assertEqual(sub_one.internal_id, "SBJ001", "incorrect subject 'id' linked to specimen") + self.assertEqual(sub_one.subject_id, "SBJ001", "incorrect subject 'id' linked to specimen") diff --git a/lib/workload/stateless/stacks/metadata-manager/app/tests/test_viewsets.py b/lib/workload/stateless/stacks/metadata-manager/app/tests/test_viewsets.py index e82260998..f302f468b 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/tests/test_viewsets.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/tests/test_viewsets.py @@ -26,7 +26,7 @@ def test_get_api(self): model_to_check = [ { "path": "library", - "props": LIBRARY_1 + "props": LIBRARY_1, }, { "path": "specimen", @@ -39,7 +39,8 @@ def test_get_api(self): ] for model in model_to_check: - path = version_endpoint(model['path']) + path_id = model['path'] + path = version_endpoint(path_id) logger.info(f"check API path for '{path}'") response = self.client.get(f"/{path}/") @@ -48,16 +49,15 @@ def test_get_api(self): result_response = response.data["results"] self.assertGreater(len(result_response), 0, "A result is expected") - logger.debug("Check if unique data has a single entry") - response = self.client.get(f"/{path}/?internal_id={model['props']['internal_id']}") + response = self.client.get(f"/{path}/?{path_id}_id={model['props'][f'{path_id}_id']}") results_response = response.data["results"] self.assertEqual( len(results_response), 1, "Single result is expected for unique data" ) logger.debug("Check if wrong parameter") - response = self.client.get(f"/{path}/?internal_id=ERROR") + response = self.client.get(f"/{path}/?{path}_id=ERROR") results_response = response.data["results"] self.assertEqual( len(results_response), @@ -80,15 +80,15 @@ def test_library_full_model_api(self): self.assertGreater(len(result_response), 0, "A result is expected") logger.debug("Check if unique data has a single entry") - response = self.client.get(f"/{path}/?internal_id={LIBRARY_1['internal_id']}") + response = self.client.get(f"/{path}/?library_id={LIBRARY_1['library_id']}") results_response = response.data["results"] self.assertEqual( len(results_response), 1, "Single result is expected for unique data" ) logger.debug("check if specimen and library are linked") - self.assertEqual(result_response[0]['specimen']['internal_id'], SPECIMEN_1["internal_id"], ) - self.assertEqual(result_response[0]['specimen']['subject']['internal_id'], SUBJECT_1["internal_id"], ) + self.assertEqual(result_response[0]['specimen']['specimen_id'], SPECIMEN_1["specimen_id"], ) + self.assertEqual(result_response[0]['specimen']['subject']['subject_id'], SUBJECT_1["subject_id"], ) def test_subject_full_model_api(self): """ @@ -105,13 +105,13 @@ def test_subject_full_model_api(self): self.assertGreater(len(result_response), 0, "A result is expected") logger.debug("Check if unique data has a single entry") - response = self.client.get(f"/{path}/?internal_id={SUBJECT_1['internal_id']}") + response = self.client.get(f"/{path}/?subject_id={SUBJECT_1['subject_id']}") results_response = response.data["results"] self.assertEqual( len(results_response), 1, "Single result is expected for unique data" ) logger.debug("check if specimen and library are linked") - self.assertEqual(result_response[0]['specimen_set'][0]['internal_id'], SPECIMEN_1["internal_id"], ) - self.assertEqual(result_response[0]['specimen_set'][0]['library_set'][0]['internal_id'], - LIBRARY_1["internal_id"], ) + self.assertEqual(result_response[0]['specimen_set'][0]['specimen_id'], SPECIMEN_1["specimen_id"], ) + self.assertEqual(result_response[0]['specimen_set'][0]['library_set'][0]['library_id'], + LIBRARY_1["library_id"], ) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/lab.py b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/lab.py index 0868df990..f9dfd2ffa 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/lab.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/lab.py @@ -15,16 +15,25 @@ class SubjectViewSet(ReadOnlyModelViewSet): pagination_class = StandardResultsSetPagination filter_backends = [filters.OrderingFilter, filters.SearchFilter] ordering_fields = "__all__" - ordering = ["-id"] + ordering = ["-subject_id"] search_fields = Subject.get_base_fields() + queryset = Subject.objects.none() + + @extend_schema(parameters=[ + SubjectSerializer + ]) + def list(self, request, *args, **kwargs): + return super().list(request, *args, **kwargs) def get_queryset(self): return Subject.objects.get_by_keyword(**self.request.query_params) - @extend_schema(responses={200: SubjectFullSerializer(many=True)}, + @extend_schema(operation_id='/api/v1/subject/full/', + responses={200: SubjectFullSerializer(many=True)}, parameters=[ - OpenApiParameter(name='library_internal_id', - description='Filter the subjects that contain this particular internal_id in ' + SubjectSerializer, + OpenApiParameter(name='libraryId', + description='Filter the subjects that has the given library_id in ' 'the Library model.', required=False, type=str), @@ -33,13 +42,13 @@ def get_queryset(self): @action(detail=False, methods=['get'], url_path='full') def get_full_model_set(self, request): query_params = self.request.query_params.copy() - qs = Subject.objects.prefetch_related("specimen_set__library_set").all().order_by("-id") + qs = Subject.objects.prefetch_related("specimen_set__library_set").all().order_by("-subject_id") - # Allow filtering by library_internal_id - library_internal_id = query_params.get("library_internal_id", None) - if library_internal_id: - query_params.pop("library_internal_id") - qs = qs.filter(specimen__library__internal_id=library_internal_id) + # Allow filtering by libraryId + library_id = query_params.get("libraryId", None) + if library_id: + query_params.pop("library_id") + qs = qs.filter(specimen__library__library_id=library_id) # Following same pattern with other filter where if unknown query params returns empty qs qs = Subject.objects.get_model_fields_query(qs, **query_params) @@ -49,10 +58,10 @@ def get_full_model_set(self, request): return self.get_paginated_response(serializer.data) - @extend_schema(responses={200: SubjectFullSerializer(many=True)}) + @extend_schema(operation_id='/api/v1/subject/id/full/', responses={200: SubjectFullSerializer(many=True)}) @action(detail=True, methods=['get'], url_path='full') def get_full_model_detail(self, request, pk=None): - subject = Subject.objects.get(id=pk) + subject = Subject.objects.get(orcabus_id=pk) serializer = SubjectFullSerializer(subject) return Response(serializer.data) @@ -63,8 +72,15 @@ class SpecimenViewSet(ReadOnlyModelViewSet): pagination_class = StandardResultsSetPagination filter_backends = [filters.OrderingFilter, filters.SearchFilter] ordering_fields = "__all__" - ordering = ["-id"] - search_fields = Subject.get_base_fields() + ordering = ["-specimen_id"] + search_fields = Specimen.get_base_fields() + queryset = Specimen.objects.none() + + @extend_schema(parameters=[ + SpecimenSerializer + ]) + def list(self, request, *args, **kwargs): + return super().list(request, *args, **kwargs) def get_queryset(self): return Specimen.objects.get_by_keyword(**self.request.query_params) @@ -75,8 +91,15 @@ class LibraryViewSet(ReadOnlyModelViewSet): pagination_class = StandardResultsSetPagination filter_backends = [filters.OrderingFilter, filters.SearchFilter] ordering_fields = "__all__" - ordering = ["-id"] - search_fields = Subject.get_base_fields() + ordering = ["-library_id"] + search_fields = Library.get_base_fields() + queryset = Library.objects.none() + + @extend_schema(parameters=[ + LibrarySerializer + ]) + def list(self, request, *args, **kwargs): + return super().list(request, *args, **kwargs) def get_queryset(self): return Library.objects.get_by_keyword(**self.request.query_params) @@ -84,7 +107,7 @@ def get_queryset(self): @extend_schema(responses={200: LibraryFullSerializer(many=True)}) @action(detail=False, methods=['get'], url_path='full') def get_full_model_set(self, request): - qs = Library.objects.select_related("specimen__subject").all().order_by("-id") + qs = Library.objects.select_related("specimen__subject").all().order_by("-library_id") # Allow filtering by the keys inside the library model qs = Library.objects.get_model_fields_query(qs, **self.request.query_params) @@ -97,7 +120,7 @@ def get_full_model_set(self, request): @extend_schema(responses={200: LibraryFullSerializer(many=False)}) @action(detail=True, methods=['get'], url_path='full') def get_full_model_detail(self, request, pk=None): - lib = Library.objects.get(id=pk) + lib = Library.objects.get(orcabus_id=pk) serializer = LibraryFullSerializer(lib) return Response(serializer.data) diff --git a/lib/workload/stateless/stacks/metadata-manager/deps/requirements-slim.txt b/lib/workload/stateless/stacks/metadata-manager/deps/requirements-slim.txt index 5e4b2f00a..a64ceb10d 100644 --- a/lib/workload/stateless/stacks/metadata-manager/deps/requirements-slim.txt +++ b/lib/workload/stateless/stacks/metadata-manager/deps/requirements-slim.txt @@ -20,3 +20,4 @@ serverless-wsgi==3.0.4 # six and regex required by automatically generated EventBridge code binding six==1.16.0 regex==2024.7.24 +ulid-py==1.1.0 diff --git a/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg b/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg index 7e3f0c01f..eae1bcba8 100644 --- a/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg +++ b/lib/workload/stateless/stacks/metadata-manager/docs/schema.drawio.svg @@ -1,4 +1,4 @@ -LibraryPKidinternal_idphenotypeworkflow qualitytypeassaycoverageSubjectPKidinternal_idSpecimenPKidinternal_idsource \ No newline at end of file +LibraryPKorcabus_idlibrary_idphenotypeworkflow qualitytypeassaycoverageproject_nameproject_ownerSubjectPKorcabus_idsubject_idSpecimenPKorcabus_idspecimen_idsource \ No newline at end of file diff --git a/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py b/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py index 23f100efd..c7bb8745c 100644 --- a/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py +++ b/lib/workload/stateless/stacks/metadata-manager/proc/service/tracking_sheet_srv.py @@ -48,15 +48,15 @@ def persist_lab_metadata(df: pd.DataFrame): rows_invalid = list() # If the df do not contain to what has existed in the db, it will be deleted - for lib in Library.objects.exclude(internal_id__in=df['library_id'].tolist()).iterator(): + for lib in Library.objects.exclude(library_id__in=df['library_id'].tolist()).iterator(): library_deleted.append(lib) lib.delete() - for spc in Specimen.objects.exclude(internal_id__in=df['sample_id'].tolist()).iterator(): + for spc in Specimen.objects.exclude(specimen_id__in=df['sample_id'].tolist()).iterator(): specimen_deleted.append(spc) spc.delete() - for sbj in Subject.objects.exclude(internal_id__in=df['subject_id'].tolist()).iterator(): + for sbj in Subject.objects.exclude(subject_id__in=df['subject_id'].tolist()).iterator(): subject_deleted.append(sbj) sbj.delete() @@ -78,9 +78,9 @@ def persist_lab_metadata(df: pd.DataFrame): # subject_id_list = record.get("subject_id_list") # # try: - # spc = Specimen.objects.get(internal_id=specimen_id) + # spc = Specimen.objects.get(specimen_id=specimen_id) # for sbj in spc.subjects.all().iterator(): - # if sbj.internal_id not in subject_id_list: + # if sbj.subject_id not in subject_id_list: # spc.subjects.remove(sbj) # # except ObjectDoesNotExist: @@ -90,7 +90,7 @@ def persist_lab_metadata(df: pd.DataFrame): # # # specimen <-> subject (addition only) # try: - # specimen.subjects.get(id=subject.id) + # specimen.subjects.get(orcabus_id=subject.orcabus_id) # except ObjectDoesNotExist: # specimen.subjects.add(subject) @@ -99,9 +99,9 @@ def persist_lab_metadata(df: pd.DataFrame): try: # 1. update or create all data in the model from the given record subject, is_sub_created = Subject.objects.update_or_create( - internal_id=record.get('subject_id'), + subject_id=record.get('subject_id'), defaults={ - "internal_id": record.get('subject_id') + "subject_id": record.get('subject_id') } ) if is_sub_created: @@ -110,29 +110,30 @@ def persist_lab_metadata(df: pd.DataFrame): subject_updated.append(subject) specimen, is_spc_created = Specimen.objects.update_or_create( - internal_id=record.get('sample_id'), + specimen_id=record.get('sample_id'), defaults={ - "internal_id": record.get('sample_id'), + "specimen_id": record.get('sample_id'), "source": get_value_from_human_readable_label(Source.choices, record.get('source')), - 'subject_id': subject.id + 'subject_id': subject.orcabus_id } ) if is_spc_created: specimen_created.append(specimen) else: specimen_updated.append(specimen) - library, is_lib_created = Library.objects.update_or_create( - internal_id=record.get('library_id'), + library_id=record.get('library_id'), defaults={ - 'internal_id': record.get('library_id'), + 'library_id': record.get('library_id'), 'phenotype': get_value_from_human_readable_label(Phenotype.choices, record.get('phenotype')), 'workflow': get_value_from_human_readable_label(WorkflowType.choices, record.get('workflow')), 'quality': get_value_from_human_readable_label(Quality.choices, record.get('quality')), 'type': get_value_from_human_readable_label(LibraryType.choices, record.get('type')), 'assay': record.get('assay'), 'coverage': sanitize_library_coverage(record.get('coverage')), - 'specimen_id': specimen.id + 'specimen_id': specimen.orcabus_id, + 'project_owner': record.get('project_owner'), + 'project_name': record.get('project_name'), } ) if is_lib_created: @@ -143,17 +144,17 @@ def persist_lab_metadata(df: pd.DataFrame): # 2. linking or updating model to each other based on the record (update if it does not match) # library <-> specimen - if library.specimen is None or library.specimen.id != specimen.id: + if library.specimen is None or library.specimen.orcabus_id != specimen.orcabus_id: library.specimen = specimen library.save() # specimen <-> subject - if specimen.subject is None or specimen.subject.id != subject.id: + if specimen.subject is None or specimen.subject.orcabus_id != subject.orcabus_id: specimen.subject = subject specimen.save() except Exception as e: - if any(record.values()): # silent off iff blank row + if any(record.values()): # silent off blank row logger.warning(f"Invalid record: {libjson.dumps(record)} Exception: {e}") rows_invalid.append(record) continue diff --git a/lib/workload/stateless/stacks/metadata-manager/proc/tests/test_tracking_sheet_srv.py b/lib/workload/stateless/stacks/metadata-manager/proc/tests/test_tracking_sheet_srv.py index e4a7f4d46..fb70cb430 100644 --- a/lib/workload/stateless/stacks/metadata-manager/proc/tests/test_tracking_sheet_srv.py +++ b/lib/workload/stateless/stacks/metadata-manager/proc/tests/test_tracking_sheet_srv.py @@ -1,7 +1,6 @@ import pandas as pd from django.test import TestCase -from libumccr import libjson from app.models import Library, Specimen, Subject from proc.service.tracking_sheet_srv import sanitize_lab_metadata_df, persist_lab_metadata @@ -120,30 +119,33 @@ def test_persist_lab_metadata(self): self.assertEqual(result.get("subject").get("new_count"), 1, "1 new subject should be created") self.assertEqual(result.get("subject").get("update_count"), 2, "2 update in subject") - lib_1 = Library.objects.get(internal_id=RECORD_1.get("LibraryID")) - self.assertEqual(lib_1.type, RECORD_1.get("Type"), "incorrect value stored") - self.assertEqual(lib_1.phenotype, RECORD_1.get("Phenotype"), "incorrect value stored") - self.assertEqual(lib_1.workflow, RECORD_1.get("Workflow"), "incorrect value stored") - self.assertEqual(lib_1.specimen.internal_id, RECORD_1.get("SampleID"), "incorrect specimen linked") + lib_1 = Library.objects.get(library_id=RECORD_1.get("LibraryID")) + self.assertEqual(lib_1.type, RECORD_1.get("Type"), "incorrect value (Type) stored") + self.assertEqual(lib_1.phenotype, RECORD_1.get("Phenotype"), "incorrect value (Phenotype) stored") + self.assertEqual(lib_1.assay, RECORD_1.get("Assay"), "incorrect value (Assay) stored") + self.assertEqual(lib_1.workflow, RECORD_1.get("Workflow"), "incorrect value (Workflow) stored") + self.assertEqual(lib_1.project_owner, RECORD_1.get("ProjectOwner"), "incorrect value (ProjectOwner) stored") + self.assertEqual(lib_1.project_name, RECORD_1.get("ProjectName"),"incorrect value (ProjectName) stored") + self.assertEqual(lib_1.specimen.specimen_id, RECORD_1.get("SampleID"), "incorrect specimen linked") - spc_1 = Specimen.objects.get(internal_id=RECORD_1.get("SampleID")) + spc_1 = Specimen.objects.get(specimen_id=RECORD_1.get("SampleID")) self.assertIsNotNone(spc_1) self.assertEqual(spc_1.source, RECORD_1.get("Source"), "incorrect value stored") - sbj_1 = Subject.objects.get(internal_id=RECORD_1.get("SubjectID")) + sbj_1 = Subject.objects.get(subject_id=RECORD_1.get("SubjectID")) self.assertIsNotNone(sbj_1) # check relationships if lib_1 and lib_2 is in the same spc_1 spc_lib_qs = spc_1.library_set.all() - self.assertEqual(spc_lib_qs.filter(internal_id=RECORD_1.get("LibraryID")).count(), 1, + self.assertEqual(spc_lib_qs.filter(library_id=RECORD_1.get("LibraryID")).count(), 1, "lib_1 and spc_1 is not linked") - self.assertEqual(spc_lib_qs.filter(internal_id=RECORD_2.get("LibraryID")).count(), 1, + self.assertEqual(spc_lib_qs.filter(library_id=RECORD_2.get("LibraryID")).count(), 1, "lib_2 and spc_1 is not linked") # check if all lib is the same with sbj_1 for rec in mock_sheet_data: - lib = Library.objects.get(internal_id=rec.get("LibraryID")) - self.assertEqual(lib.specimen.subject.internal_id, RECORD_1.get("SubjectID"), + lib = Library.objects.get(library_id=rec.get("LibraryID")) + self.assertEqual(lib.specimen.subject.subject_id, RECORD_1.get("SubjectID"), "library is not linked to the same subject") def test_persist_lab_metadata_alter_sbj(self): @@ -162,18 +164,18 @@ def test_persist_lab_metadata_alter_sbj(self): metadata_pd = sanitize_lab_metadata_df(metadata_pd) persist_lab_metadata(metadata_pd) - sbj_4 = Subject.objects.get(internal_id=RECORD_3_DIFF_SBJ['SubjectID']) + sbj_4 = Subject.objects.get(subject_id=RECORD_3_DIFF_SBJ['SubjectID']) self.assertIsNotNone(sbj_4) - spc_4 = sbj_4.specimen_set.get(internal_id=RECORD_3_DIFF_SBJ['SampleID']) - self.assertEqual(spc_4.internal_id, RECORD_3_DIFF_SBJ['SampleID'], + spc_4 = sbj_4.specimen_set.get(specimen_id=RECORD_3_DIFF_SBJ['SampleID']) + self.assertEqual(spc_4.specimen_id, RECORD_3_DIFF_SBJ['SampleID'], 'specimen obj should not change on link update') metadata_pd = pd.json_normalize([RECORD_3_DIFF_SPC]) metadata_pd = sanitize_lab_metadata_df(metadata_pd) persist_lab_metadata(metadata_pd) - lib_3 = Library.objects.get(internal_id=RECORD_3['LibraryID']) - self.assertEqual(lib_3.specimen.internal_id, RECORD_3_DIFF_SPC['SampleID'], + lib_3 = Library.objects.get(library_id=RECORD_3['LibraryID']) + self.assertEqual(lib_3.specimen.specimen_id, RECORD_3_DIFF_SPC['SampleID'], 'incorrect link between lib and spc when changing links') def test_with_deleted_model(self) -> None: @@ -192,7 +194,7 @@ def test_with_deleted_model(self) -> None: metadata_pd = sanitize_lab_metadata_df(metadata_pd) result = persist_lab_metadata(metadata_pd) - deleted_lib = Library.objects.filter(internal_id__in=[RECORD_1.get('LibraryID'), RECORD_2.get('LibraryID')]) + deleted_lib = Library.objects.filter(library_id__in=[RECORD_1.get('LibraryID'), RECORD_2.get('LibraryID')]) self.assertEqual(deleted_lib.count(), 0, 'these library query should all be deleted') self.assertEqual(result.get("library").get("delete_count"), 2, "2 library should be deleted") @@ -210,6 +212,6 @@ def test_save_choice_from_human_readable_label(self) -> None: metadata_pd = sanitize_lab_metadata_df(metadata_pd) persist_lab_metadata(metadata_pd) - spc = Specimen.objects.get(internal_id=mock_record.get("SampleID")) + spc = Specimen.objects.get(specimen_id=mock_record.get("SampleID")) self.assertIsNotNone(spc) self.assertEqual(spc.source, 'water', "incorrect value stored") From 7e50552dc0a73a378704c70566a01fd9b6fd7d13 Mon Sep 17 00:00:00 2001 From: Victor San Kho Lin Date: Wed, 21 Aug 2024 21:46:55 +1000 Subject: [PATCH 13/20] Fixed metadata endpoint allow dot sep in PK * This allows dot separator character primary key in API lookup. Such as 'BASE_URL/api/v1/library/lib.01J5TAAXYMXRRMA1VQNGW905AD' --- .../stateless/stacks/metadata-manager/app/viewsets/lab.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/lab.py b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/lab.py index f9dfd2ffa..089b9f630 100644 --- a/lib/workload/stateless/stacks/metadata-manager/app/viewsets/lab.py +++ b/lib/workload/stateless/stacks/metadata-manager/app/viewsets/lab.py @@ -11,6 +11,7 @@ class SubjectViewSet(ReadOnlyModelViewSet): + lookup_value_regex = "[^/]+" serializer_class = SubjectSerializer pagination_class = StandardResultsSetPagination filter_backends = [filters.OrderingFilter, filters.SearchFilter] @@ -68,6 +69,7 @@ def get_full_model_detail(self, request, pk=None): class SpecimenViewSet(ReadOnlyModelViewSet): + lookup_value_regex = "[^/]+" serializer_class = SpecimenSerializer pagination_class = StandardResultsSetPagination filter_backends = [filters.OrderingFilter, filters.SearchFilter] @@ -87,6 +89,7 @@ def get_queryset(self): class LibraryViewSet(ReadOnlyModelViewSet): + lookup_value_regex = "[^/]+" serializer_class = LibrarySerializer pagination_class = StandardResultsSetPagination filter_backends = [filters.OrderingFilter, filters.SearchFilter] From b6dcd4efb76d917d37664cc94be4296d225f1a80 Mon Sep 17 00:00:00 2001 From: Florian Reisinger Date: Thu, 22 Aug 2024 14:54:01 +1000 Subject: [PATCH 14/20] Add state sub-endpoint to workflowrun API --- .../commands/generate_mock_workflow_run.py | 20 ++- .../migrations/0001_initial.py | 121 ++++++++++-------- ...ibraryassociation_workflowrun_libraries.py | 42 ------ .../workflow_manager/models/__init__.py | 2 +- .../workflow_manager/models/library.py | 1 - .../workflow_manager/models/state.py | 3 +- .../workflow_manager/models/workflow_run.py | 9 +- .../workflow_manager/serializers.py | 12 +- .../workflow_manager/tests/factories.py | 18 ++- .../workflow_manager/urls/base.py | 9 +- .../workflow_manager/viewsets/state.py | 22 ++++ .../services/create_workflow_run.py | 9 +- .../tests/test_workflow_srv.py | 21 ++- 13 files changed, 153 insertions(+), 136 deletions(-) delete mode 100644 lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0002_library_libraryassociation_workflowrun_libraries.py create mode 100644 lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/state.py diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/management/commands/generate_mock_workflow_run.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/management/commands/generate_mock_workflow_run.py index ee8dcf680..e7ff462f0 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/management/commands/generate_mock_workflow_run.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/management/commands/generate_mock_workflow_run.py @@ -5,8 +5,9 @@ import json from datetime import datetime from libumccr import libjson -from workflow_manager.models import Workflow, WorkflowRun, LibraryAssociation -from workflow_manager.tests.factories import WorkflowRunFactory, WorkflowFactory, PayloadFactory, LibraryFactory +from workflow_manager.models import Workflow, WorkflowRun, LibraryAssociation, State +from workflow_manager.tests.factories import WorkflowRunFactory, WorkflowFactory, PayloadFactory, LibraryFactory, \ + StateFactory WORKFLOW_NAME = "TestWorkflow" @@ -16,7 +17,6 @@ class Command(BaseCommand): help = "Generate mock Workflow data into database for local development and testing" def handle(self, *args, **options): - wf_payload = PayloadFactory() qs: QuerySet = Workflow.objects.filter(workflow_name=WORKFLOW_NAME) if qs.exists(): @@ -27,10 +27,15 @@ def handle(self, *args, **options): wfr: WorkflowRun = WorkflowRunFactory( workflow_run_name="MockWorkflowRun", portal_run_id="1234", - payload=wf_payload, workflow=wf ) + wf_payload = PayloadFactory() + StateFactory( + workflow_run=wfr, + payload=wf_payload + ) + library = LibraryFactory() LibraryAssociation.objects.create( workflow_run=wfr, @@ -42,9 +47,14 @@ def handle(self, *args, **options): wfr2: WorkflowRun = WorkflowRunFactory( workflow_run_name="MockWorkflowRun2", portal_run_id="1235", - payload=wf_payload, workflow=wf ) + StateFactory( + workflow_run=wfr2, + status="RUNNING", + payload=wf_payload + ) + library2 = LibraryFactory(orcabus_id="lib.01J5M2JFE1JPYV62RYQEG99CP5", library_id="L000002") LibraryAssociation.objects.create( workflow_run=wfr2, diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0001_initial.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0001_initial.py index 67baab19e..484ed0407 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0001_initial.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 5.0.6 on 2024-05-17 20:10 +# Generated by Django 5.1 on 2024-08-21 05:48 import django.core.serializers.json import django.db.models.deletion @@ -9,77 +9,90 @@ class Migration(migrations.Migration): initial = True - dependencies = [] + dependencies = [ + ] operations = [ migrations.CreateModel( - name="Payload", + name='Library', + fields=[ + ('orcabus_id', models.CharField(max_length=255, primary_key=True, serialize=False)), + ('library_id', models.CharField(max_length=255)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='Payload', + fields=[ + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ('payload_ref_id', models.CharField(max_length=255, unique=True)), + ('version', models.CharField(max_length=255)), + ('data', models.JSONField(encoder=django.core.serializers.json.DjangoJSONEncoder)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='LibraryAssociation', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('association_date', models.DateTimeField()), + ('status', models.CharField(max_length=255)), + ('library', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='workflow_manager.library')), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='Workflow', fields=[ - ("id", models.BigAutoField(primary_key=True, serialize=False)), - ("payload_ref_id", models.CharField(max_length=255, unique=True)), - ("version", models.CharField(max_length=255)), - ( - "data", - models.JSONField( - encoder=django.core.serializers.json.DjangoJSONEncoder - ), - ), + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ('workflow_name', models.CharField(max_length=255)), + ('workflow_version', models.CharField(max_length=255)), + ('execution_engine', models.CharField(max_length=255)), + ('execution_engine_pipeline_id', models.CharField(max_length=255)), + ('approval_state', models.CharField(max_length=255)), ], options={ - "abstract": False, + 'unique_together': {('workflow_name', 'workflow_version')}, }, ), migrations.CreateModel( - name="Workflow", + name='WorkflowRun', fields=[ - ("id", models.BigAutoField(primary_key=True, serialize=False)), - ("workflow_name", models.CharField(max_length=255)), - ("workflow_version", models.CharField(max_length=255)), - ("execution_engine", models.CharField(max_length=255)), - ("execution_engine_pipeline_id", models.CharField(max_length=255)), - ("approval_state", models.CharField(max_length=255)), + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ('portal_run_id', models.CharField(max_length=255, unique=True)), + ('execution_id', models.CharField(blank=True, max_length=255, null=True)), + ('workflow_run_name', models.CharField(blank=True, max_length=255, null=True)), + ('comment', models.CharField(blank=True, max_length=255, null=True)), + ('libraries', models.ManyToManyField(through='workflow_manager.LibraryAssociation', to='workflow_manager.library')), + ('workflow', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='workflow_manager.workflow')), ], options={ - "unique_together": {("workflow_name", "workflow_version")}, + 'abstract': False, }, ), + migrations.AddField( + model_name='libraryassociation', + name='workflow_run', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='workflow_manager.workflowrun'), + ), migrations.CreateModel( - name="WorkflowRun", + name='State', fields=[ - ("id", models.BigAutoField(primary_key=True, serialize=False)), - ("portal_run_id", models.CharField(max_length=255)), - ("status", models.CharField(max_length=255)), - ("timestamp", models.DateTimeField()), - ( - "execution_id", - models.CharField(blank=True, max_length=255, null=True), - ), - ( - "workflow_run_name", - models.CharField(blank=True, max_length=255, null=True), - ), - ("comment", models.CharField(blank=True, max_length=255, null=True)), - ( - "payload", - models.ForeignKey( - blank=True, - null=True, - on_delete=django.db.models.deletion.SET_NULL, - to="workflow_manager.payload", - ), - ), - ( - "workflow", - models.ForeignKey( - blank=True, - null=True, - on_delete=django.db.models.deletion.SET_NULL, - to="workflow_manager.workflow", - ), - ), + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ('status', models.CharField(max_length=255)), + ('timestamp', models.DateTimeField()), + ('comment', models.CharField(blank=True, max_length=255, null=True)), + ('payload', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='workflow_manager.payload')), + ('workflow_run', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='workflow_manager.workflowrun')), ], options={ - "unique_together": {("portal_run_id", "status", "timestamp")}, + 'unique_together': {('workflow_run', 'status', 'timestamp')}, }, ), ] diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0002_library_libraryassociation_workflowrun_libraries.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0002_library_libraryassociation_workflowrun_libraries.py deleted file mode 100644 index 1d7e891f3..000000000 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/migrations/0002_library_libraryassociation_workflowrun_libraries.py +++ /dev/null @@ -1,42 +0,0 @@ -# Generated by Django 5.1 on 2024-08-19 01:52 - -import django.db.models.deletion -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('workflow_manager', '0001_initial'), - ] - - operations = [ - migrations.CreateModel( - name='Library', - fields=[ - ('orcabus_id', models.CharField(max_length=255, primary_key=True, serialize=False)), - ('library_id', models.CharField(max_length=255)), - ], - options={ - 'abstract': False, - }, - ), - migrations.CreateModel( - name='LibraryAssociation', - fields=[ - ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('association_date', models.DateTimeField()), - ('status', models.CharField(max_length=255)), - ('library', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='workflow_manager.library')), - ('workflow_run', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='workflow_manager.workflowrun')), - ], - options={ - 'abstract': False, - }, - ), - migrations.AddField( - model_name='workflowrun', - name='libraries', - field=models.ManyToManyField(through='workflow_manager.LibraryAssociation', to='workflow_manager.library'), - ), - ] diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/__init__.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/__init__.py index 81f4f3d24..c90b18c43 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/__init__.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/__init__.py @@ -4,4 +4,4 @@ from .payload import Payload from .workflow_run import WorkflowRun, LibraryAssociation from .library import Library -from .state import State \ No newline at end of file +from .state import State diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/library.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/library.py index fc57ac4dc..149947390 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/library.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/library.py @@ -2,7 +2,6 @@ from django.db import models from workflow_manager.models.base import OrcaBusBaseModel, OrcaBusBaseManager -from workflow_manager.models.workflow import Workflow class LibraryManager(OrcaBusBaseManager): diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/state.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/state.py index 76f927ae1..22dc8b016 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/state.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/state.py @@ -1,7 +1,8 @@ from django.db import models from workflow_manager.models.base import OrcaBusBaseModel, OrcaBusBaseManager -from workflow_manager.models import WorkflowRun, Payload +from workflow_manager.models.workflow_run import WorkflowRun +from workflow_manager.models.payload import Payload class StateManager(OrcaBusBaseManager): diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/workflow_run.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/workflow_run.py index 26fc64a59..9c6fa6b00 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/workflow_run.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/models/workflow_run.py @@ -1,7 +1,8 @@ from django.db import models from workflow_manager.models.base import OrcaBusBaseModel, OrcaBusBaseManager -from workflow_manager.models import Workflow, Library, State +from workflow_manager.models.library import Library +from workflow_manager.models.workflow import Workflow class WorkflowRunManager(OrcaBusBaseManager): @@ -15,9 +16,6 @@ class WorkflowRun(OrcaBusBaseModel): # --- mandatory fields portal_run_id = models.CharField(max_length=255, unique=True) - current_status = models.CharField(max_length=255) - created = models.DateTimeField() - last_modified = models.DateTimeField() # --- optional fields @@ -43,9 +41,6 @@ def to_dict(self): return { "id": self.id, "portal_run_id": self.portal_run_id, - "current_status": self.current_status, - "created": str(self.created), - "last_modified": str(self.last_modified), "execution_id": self.execution_id, "workflow_run_name": self.workflow_run_name, "comment": self.comment, diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/serializers.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/serializers.py index aaa783fde..da6ba63f0 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/serializers.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/serializers.py @@ -1,9 +1,6 @@ -from typing import Dict, List - from rest_framework import serializers -from rest_framework.fields import empty -from workflow_manager.models import Workflow, WorkflowRun, Payload, Library +from workflow_manager.models import Workflow, WorkflowRun, Payload, Library, State READ_ONLY_SERIALIZER = "READ ONLY SERIALIZER" @@ -53,7 +50,14 @@ class Meta: model = Payload fields = '__all__' + class LibraryModelSerializer(serializers.ModelSerializer): class Meta: model = Library fields = '__all__' + + +class StateModelSerializer(serializers.ModelSerializer): + class Meta: + model = State + fields = '__all__' diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/tests/factories.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/tests/factories.py index a530cddaf..dce8c2842 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/tests/factories.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/tests/factories.py @@ -1,12 +1,12 @@ from enum import Enum -import uuid, json +import uuid from datetime import datetime from zoneinfo import ZoneInfo import factory from django.utils.timezone import make_aware -from workflow_manager.models import Workflow, WorkflowRun, Payload, Library +from workflow_manager.models import Workflow, WorkflowRun, Payload, Library, State class TestConstant(Enum): @@ -51,11 +51,8 @@ class Meta: portal_run_id = f"20240130{_uid[:8]}" execution_id = _uid workflow_run_name = f"TestWorkflowRun{_uid[:8]}" - status = "READY" comment = "Lorem Ipsum" - timestamp = make_aware(datetime.now()) # If required, set later - payload = None workflow = None @@ -65,3 +62,14 @@ class Meta: library_id = TestConstant.library.value["library_id"] orcabus_id = TestConstant.library.value["orcabus_id"] + + +class StateFactory(factory.django.DjangoModelFactory): + class Meta: + model = State + + status = "READY" + timestamp = make_aware(datetime.now()) + comment = "Comment" + payload = None + workflow_run = factory.SubFactory(WorkflowRunFactory) diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py index ce9d7ca47..7232769bc 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py @@ -4,7 +4,7 @@ from workflow_manager.viewsets.workflow import WorkflowViewSet from workflow_manager.viewsets.workflow_run import WorkflowRunViewSet from workflow_manager.viewsets.payload import PayloadViewSet -from workflow_manager.viewsets.library import LibraryViewSet +from workflow_manager.viewsets.state import StateViewSet from workflow_manager.settings.base import API_VERSION api_namespace = "api" @@ -15,7 +15,12 @@ router.register(r"workflow", WorkflowViewSet, basename="workflow") router.register(r"workflowrun", WorkflowRunViewSet, basename="workflowrun") router.register(r"payload", PayloadViewSet, basename="payload") -# router.register(r"library", LibraryViewSet, basename="library") + +router.register( + "workflowrun/(?P[^/.]+)/state", + StateViewSet, + basename="workflowrun-state", +) urlpatterns = [ path(f"{api_base}", include(router.urls)), diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/state.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/state.py new file mode 100644 index 000000000..660209330 --- /dev/null +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/state.py @@ -0,0 +1,22 @@ +from rest_framework import filters +from rest_framework.viewsets import ReadOnlyModelViewSet + +from workflow_manager.models import State +from workflow_manager.pagination import StandardResultsSetPagination +from workflow_manager.serializers import StateModelSerializer + + +class StateViewSet(ReadOnlyModelViewSet): + serializer_class = StateModelSerializer + pagination_class = StandardResultsSetPagination + filter_backends = [filters.OrderingFilter, filters.SearchFilter] + ordering_fields = '__all__' + ordering = ['-id'] + search_fields = State.get_base_fields() + + def get_queryset(self): + qs = State.objects.filter(workflow_run=self.kwargs["workflowrun_id"]) + qs = State.objects.get_model_fields_query(qs, **self.request.query_params) + return qs + + diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py index 5310f7472..267da069a 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py @@ -16,8 +16,6 @@ from workflow_manager.models import ( WorkflowRun, Workflow, - State, - Payload, Library, LibraryAssociation, ) @@ -58,8 +56,6 @@ def handler(event, context): # then create the actual workflow run entry if it does not exist try: wfr: WorkflowRun = WorkflowRun.objects.get(portal_run_id=wrsc.portalRunId) - wfr.current_status = wrsc.status - wfr.last_modified = wrsc.timestamp except Exception: print("No workflow found! Creating new entry.") wfr = WorkflowRun( @@ -67,10 +63,7 @@ def handler(event, context): portal_run_id=wrsc.portalRunId, execution_id=wrsc.executionId, # the execution service WRSC does carry the execution ID workflow_run_name=wrsc.workflowRunName, - current_status=wrsc.status, - comment=None, - last_modified=wrsc.timestamp, - created=wrsc.timestamp + comment=None ) print("Persisting Workflow record.") wfr.save() diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/tests/test_workflow_srv.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/tests/test_workflow_srv.py index 829c1b0fb..a810e8bef 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/tests/test_workflow_srv.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/tests/test_workflow_srv.py @@ -1,7 +1,10 @@ from unittest import skip + +from django.db.models import QuerySet + from workflow_manager_proc.services import create_workflow_run from workflow_manager_proc.tests.case import WorkflowManagerProcUnitTestCase, logger -from workflow_manager.models.workflow import Workflow +from workflow_manager.models import WorkflowRun class WorkflowSrvUnitTests(WorkflowManagerProcUnitTestCase): @@ -9,7 +12,7 @@ class WorkflowSrvUnitTests(WorkflowManagerProcUnitTestCase): # @skip def test_get_workflow_from_db(self): """ - python manage.py test workflow_manager_proc.tests.test_workflow_srv.WorkflowSrvUnitTests.test_get_workflow_from_db + # python manage.py test workflow_manager_proc.tests.test_workflow_srv.WorkflowSrvUnitTests.test_get_workflow_from_db """ test_event = { @@ -36,10 +39,16 @@ def test_get_workflow_from_db(self): } } - test_wfl = create_workflow_run.handler(test_event, None) - logger.info(test_wfl) - self.assertIsNotNone(test_wfl) - self.assertEqual("ctTSO500-L000002", test_wfl.workflow_run_name) + test_wfr = create_workflow_run.handler(test_event, None) + logger.info(test_wfr) + self.assertIsNotNone(test_wfr) + self.assertEqual("ctTSO500-L000002", test_wfr.workflow_run_name) + + logger.info("Retrieve persisted DB records") + wfr_qs: QuerySet = WorkflowRun.objects.all() + self.assertEqual(1, wfr_qs.count()) + db_wfr: WorkflowRun = wfr_qs.first() + self.assertEqual("ctTSO500-L000002", db_wfr.workflow_run_name) def test_get_workflow_from_db2(self): """ From 5d90020c00387be7447923e78734dbe3523f6d78 Mon Sep 17 00:00:00 2001 From: Florian Reisinger Date: Thu, 22 Aug 2024 15:28:36 +1000 Subject: [PATCH 15/20] Add nested library endpoint to workflowrun API --- .../stacks/workflow-manager/workflow_manager/urls/base.py | 7 +++++++ .../workflow-manager/workflow_manager/viewsets/library.py | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py index 7232769bc..207d2bbce 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/urls/base.py @@ -5,6 +5,7 @@ from workflow_manager.viewsets.workflow_run import WorkflowRunViewSet from workflow_manager.viewsets.payload import PayloadViewSet from workflow_manager.viewsets.state import StateViewSet +from workflow_manager.viewsets.library import LibraryViewSet from workflow_manager.settings.base import API_VERSION api_namespace = "api" @@ -22,6 +23,12 @@ basename="workflowrun-state", ) +router.register( + "workflowrun/(?P[^/.]+)/library", + LibraryViewSet, + basename="workflowrun-library", +) + urlpatterns = [ path(f"{api_base}", include(router.urls)), ] diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/library.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/library.py index bf2d5f150..36d1349e8 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/library.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/viewsets/library.py @@ -15,4 +15,6 @@ class LibraryViewSet(ReadOnlyModelViewSet): search_fields = Library.get_base_fields() def get_queryset(self): - return Library.objects.get_by_keyword(**self.request.query_params) + qs = Library.objects.filter(workflowrun=self.kwargs["workflowrun_id"]) + qs = Library.objects.get_model_fields_query(qs, **self.request.query_params) + return qs From 1ccd835080a01a7d49e5fae0ffc6458b52c61727 Mon Sep 17 00:00:00 2001 From: Florian Reisinger Date: Thu, 22 Aug 2024 19:18:18 +1000 Subject: [PATCH 16/20] Update mock data --- .../stacks/workflow-manager/Makefile | 5 + .../commands/generate_mock_workflow_run.py | 152 ++++++++++++++---- 2 files changed, 128 insertions(+), 29 deletions(-) diff --git a/lib/workload/stateless/stacks/workflow-manager/Makefile b/lib/workload/stateless/stacks/workflow-manager/Makefile index a910d004e..ee07e2122 100644 --- a/lib/workload/stateless/stacks/workflow-manager/Makefile +++ b/lib/workload/stateless/stacks/workflow-manager/Makefile @@ -30,6 +30,11 @@ migrate: start: migrate @python manage.py runserver_plus 0.0.0.0:8000 +mock: + @python manage.py generate_mock_workflow_run + +run-mock: reset-db migrate mock start + openapi: @python manage.py generateschema > orcabus.hlo.openapi.yaml diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/management/commands/generate_mock_workflow_run.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/management/commands/generate_mock_workflow_run.py index e7ff462f0..20bcffe2a 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager/management/commands/generate_mock_workflow_run.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager/management/commands/generate_mock_workflow_run.py @@ -11,57 +11,151 @@ WORKFLOW_NAME = "TestWorkflow" +STATUS_START = "READY" +STATUS_RUNNING = "RUNNING" +STATUS_END = "SUCCEEDED" +STATUS_FAIL = "FAILED" # https://docs.djangoproject.com/en/5.0/howto/custom-management-commands/ class Command(BaseCommand): - help = "Generate mock Workflow data into database for local development and testing" + help = """ + Generate mock data and populate DB for local testing. + """ def handle(self, *args, **options): - qs: QuerySet = Workflow.objects.filter(workflow_name=WORKFLOW_NAME) - - if qs.exists(): + # don't do anything if there is already mock data + if Workflow.objects.filter(workflow_name__startswith=WORKFLOW_NAME).exists(): print("Mock data found, Skipping creation.") return - wf = WorkflowFactory(workflow_name=WORKFLOW_NAME) - wfr: WorkflowRun = WorkflowRunFactory( - workflow_run_name="MockWorkflowRun", + # Common components: payload and libraries + generic_payload = PayloadFactory() # Payload content is not important for now + libraries = [ + LibraryFactory(orcabus_id="lib.01J5M2JFE1JPYV62RYQEG99CP1", library_id="L000001"), + LibraryFactory(orcabus_id="lib.02J5M2JFE1JPYV62RYQEG99CP2", library_id="L000002"), + LibraryFactory(orcabus_id="lib.03J5M2JFE1JPYV62RYQEG99CP3", library_id="L000003"), + LibraryFactory(orcabus_id="lib.04J5M2JFE1JPYV62RYQEG99CP4", library_id="L000004") + ] + + # First case: a primary workflow with two executions linked to 4 libraries + # The first execution failed and led to a repetition that succeeded + self.create_primary(generic_payload, libraries) + self.create_secondary(generic_payload, libraries) + + print("Done") + + @staticmethod + def create_primary(generic_payload, libraries): + """ + Case: a primary workflow with two executions linked to 4 libraries + The first execution failed and led to a repetition that succeeded + """ + + wf = WorkflowFactory(workflow_name=WORKFLOW_NAME + "Primary") + + # The first execution (workflow run 1) + wfr_1: WorkflowRun = WorkflowRunFactory( + workflow_run_name=WORKFLOW_NAME + "PrimaryRun1", portal_run_id="1234", workflow=wf ) + for state in [STATUS_START, STATUS_RUNNING, STATUS_FAIL]: + StateFactory(workflow_run=wfr_1, status=state, payload=generic_payload) + for i in [0, 1, 2, 3]: + LibraryAssociation.objects.create( + workflow_run=wfr_1, + library=libraries[i], + association_date=make_aware(datetime.now()), + status="ACTIVE", + ) - wf_payload = PayloadFactory() - StateFactory( - workflow_run=wfr, - payload=wf_payload + # The second execution (workflow run 2) + wfr_2: WorkflowRun = WorkflowRunFactory( + workflow_run_name=WORKFLOW_NAME + "PrimaryRun2", + portal_run_id="1235", + workflow=wf ) + for state in [STATUS_START, STATUS_RUNNING, STATUS_END]: + StateFactory(workflow_run=wfr_2, status=state, payload=generic_payload) + for i in [0, 1, 2, 3]: + LibraryAssociation.objects.create( + workflow_run=wfr_2, + library=libraries[i], + association_date=make_aware(datetime.now()), + status="ACTIVE", + ) + + @staticmethod + def create_secondary(generic_payload, libraries): + """ + Case: a secondary pipeline comprising 3 workflows with corresponding executions + First workflow: QC (2 runs for 2 libraries) + Second workflow: Alignment (1 run for 2 libraries) + Third workflow: VariantCalling (1 run for 2 libraries) + """ - library = LibraryFactory() + wf_qc = WorkflowFactory(workflow_name=WORKFLOW_NAME + "QC") + + # QC of Library 1 + wfr_qc_1: WorkflowRun = WorkflowRunFactory( + workflow_run_name=WORKFLOW_NAME + "QCRunLib1", + portal_run_id="2345", + workflow=wf_qc + ) + for state in [STATUS_START, STATUS_RUNNING, STATUS_END]: + StateFactory(workflow_run=wfr_qc_1, status=state, payload=generic_payload) LibraryAssociation.objects.create( - workflow_run=wfr, - library=library, + workflow_run=wfr_qc_1, + library=libraries[0], association_date=make_aware(datetime.now()), status="ACTIVE", ) - wfr2: WorkflowRun = WorkflowRunFactory( - workflow_run_name="MockWorkflowRun2", - portal_run_id="1235", - workflow=wf - ) - StateFactory( - workflow_run=wfr2, - status="RUNNING", - payload=wf_payload + # QC of Library 2 + wfr_qc_2: WorkflowRun = WorkflowRunFactory( + workflow_run_name=WORKFLOW_NAME + "QCRunLib2", + portal_run_id="2346", + workflow=wf_qc ) - - library2 = LibraryFactory(orcabus_id="lib.01J5M2JFE1JPYV62RYQEG99CP5", library_id="L000002") + for state in [STATUS_START, STATUS_RUNNING, STATUS_END]: + StateFactory(workflow_run=wfr_qc_2, status=state, payload=generic_payload) LibraryAssociation.objects.create( - workflow_run=wfr2, - library=library2, + workflow_run=wfr_qc_2, + library=libraries[1], association_date=make_aware(datetime.now()), status="ACTIVE", ) - print(libjson.dumps(wf.to_dict())) - print("Done") + # Alignment + wf_align = WorkflowFactory(workflow_name=WORKFLOW_NAME + "Alignment") + wfr_a: WorkflowRun = WorkflowRunFactory( + workflow_run_name=WORKFLOW_NAME + "AlignmentRun", + portal_run_id="3456", + workflow=wf_align + ) + for state in [STATUS_START, STATUS_RUNNING, STATUS_END]: + StateFactory(workflow_run=wfr_a, status=state, payload=generic_payload) + for i in [0, 1]: + LibraryAssociation.objects.create( + workflow_run=wfr_a, + library=libraries[i], + association_date=make_aware(datetime.now()), + status="ACTIVE", + ) + + # Variant Calling + wf_vc = WorkflowFactory(workflow_name=WORKFLOW_NAME + "VariantCalling") + wfr_vc: WorkflowRun = WorkflowRunFactory( + workflow_run_name=WORKFLOW_NAME + "VariantCallingRun", + portal_run_id="4567", + workflow=wf_vc + ) + for state in [STATUS_START, STATUS_RUNNING, STATUS_END]: + StateFactory(workflow_run=wfr_vc, status=state, payload=generic_payload) + for i in [0, 1]: + LibraryAssociation.objects.create( + workflow_run=wfr_vc, + library=libraries[i], + association_date=make_aware(datetime.now()), + status="ACTIVE", + ) From bad848a1a45711c0a1f52eba8a9d36e665c70b88 Mon Sep 17 00:00:00 2001 From: Victor San Kho Lin Date: Sat, 24 Aug 2024 18:19:44 +1000 Subject: [PATCH 17/20] Fixed WorkflowRun State creation business logic * WorkflowRun State creation is WRSC timestamp dependant. It needs to check "time window" condition before saving and emitting (relaying) WRSC event. --- .../workflow_manager_proc/services/create_workflow_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py index 267da069a..ca8ebe72d 100644 --- a/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py +++ b/lib/workload/stateless/stacks/workflow-manager/workflow_manager_proc/services/create_workflow_run.py @@ -69,7 +69,7 @@ def handler(event, context): wfr.save() # create the related state & payload entries for the WRSC - create_workflow_run_state(wrsc=wrsc, wfr=wfr) + # create_workflow_run_state(wrsc=wrsc, wfr=wfr) # FIXME State creation is "time window" WRSC timestamp dependant # if the workflow run is linked to library record(s), create the association(s) input_libraries: list[LibraryRecord] = wrsc.linkedLibraries From 17aae7f6e69fcb5d982e96178a12926d9819e5f0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 25 Aug 2024 23:48:09 +0000 Subject: [PATCH 18/20] build(deps): bump micromatch from 4.0.7 to 4.0.8 Bumps [micromatch](https://github.com/micromatch/micromatch) from 4.0.7 to 4.0.8. - [Release notes](https://github.com/micromatch/micromatch/releases) - [Changelog](https://github.com/micromatch/micromatch/blob/4.0.8/CHANGELOG.md) - [Commits](https://github.com/micromatch/micromatch/compare/4.0.7...4.0.8) --- updated-dependencies: - dependency-name: micromatch dependency-type: indirect ... Signed-off-by: dependabot[bot] --- yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yarn.lock b/yarn.lock index 7566f84af..491b46630 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3752,12 +3752,12 @@ __metadata: linkType: hard "micromatch@npm:^4.0.4": - version: 4.0.7 - resolution: "micromatch@npm:4.0.7" + version: 4.0.8 + resolution: "micromatch@npm:4.0.8" dependencies: braces: "npm:^3.0.3" picomatch: "npm:^2.3.1" - checksum: 10/a11ed1cb67dcbbe9a5fc02c4062cf8bb0157d73bf86956003af8dcfdf9b287f9e15ec0f6d6925ff6b8b5b496202335e497b01de4d95ef6cf06411bc5e5c474a0 + checksum: 10/6bf2a01672e7965eb9941d1f02044fad2bd12486b5553dc1116ff24c09a8723157601dc992e74c911d896175918448762df3b3fd0a6b61037dd1a9766ddfbf58 languageName: node linkType: hard From a81e391c33b605ab053d850c83a3b7b83a4a76be Mon Sep 17 00:00:00 2001 From: Marko Malenic Date: Mon, 26 Aug 2024 11:33:51 +1000 Subject: [PATCH 19/20] fix(filemanager): increase migrate function timeout --- .../filemanager/deploy/constructs/functions/api.ts | 2 +- .../deploy/constructs/functions/function.ts | 14 +++++++++----- .../deploy/constructs/functions/ingest.ts | 2 +- .../deploy/constructs/functions/inventory.ts | 2 +- .../deploy/constructs/functions/migrate.ts | 9 +++++++-- 5 files changed, 19 insertions(+), 10 deletions(-) diff --git a/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/api.ts b/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/api.ts index 5182a8117..2466dd508 100644 --- a/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/api.ts +++ b/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/api.ts @@ -6,7 +6,7 @@ import { BucketProps } from './ingest'; /** * Props for the API function. */ -export type ApiFunctionProps = fn.FunctionPropsNoPackage & DatabaseProps & BucketProps; +export type ApiFunctionProps = fn.FunctionPropsConfigurable & DatabaseProps & BucketProps; /** * A construct for the Lambda API function. diff --git a/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/function.ts b/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/function.ts index df7f26d2e..bc0ef1bef 100644 --- a/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/function.ts +++ b/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/function.ts @@ -31,9 +31,9 @@ export type DatabaseProps = { }; /** - * Props for a Rust function without the package. + * Props for a Rust function which can be configured from the top-level orcabus context. */ -export type FunctionPropsNoPackage = { +export type FunctionPropsConfigurable = { /** * Additional build environment variables when building the Lambda function. */ @@ -57,9 +57,9 @@ export type FunctionPropsNoPackage = { }; /** - * Props for the Rust function. + * Props for the Rust function which can be configured from the top-level orcabus context. */ -export type FunctionProps = FunctionPropsNoPackage & +export type FunctionProps = FunctionPropsConfigurable & DatabaseProps & { /** * The package to build for this function. @@ -69,6 +69,10 @@ export type FunctionProps = FunctionPropsNoPackage & * Name of the Lambda function resource. */ readonly functionName?: string; + /** + * The timeout for the Lambda function, defaults to 28 seconds. + */ + readonly timeout?: Duration; }; /** @@ -121,7 +125,7 @@ export class Function extends Construct { }, }, memorySize: 128, - timeout: Duration.seconds(28), + timeout: props.timeout ?? Duration.seconds(28), environment: { // No password here, using RDS IAM to generate credentials. PGHOST: props.host, diff --git a/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/ingest.ts b/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/ingest.ts index 0c3628591..9ff5eece4 100644 --- a/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/ingest.ts +++ b/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/ingest.ts @@ -28,7 +28,7 @@ export type EventSourceProps = { /** * Props for the ingest function. */ -export type IngestFunctionProps = fn.FunctionPropsNoPackage & DatabaseProps & EventSourceProps; +export type IngestFunctionProps = fn.FunctionPropsConfigurable & DatabaseProps & EventSourceProps; /** * A construct for the Lambda ingest function. diff --git a/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/inventory.ts b/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/inventory.ts index a0eed6a43..8555800da 100644 --- a/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/inventory.ts +++ b/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/inventory.ts @@ -19,7 +19,7 @@ export type InventoryFunctionConfig = { /** * Props for the inventory function. */ -export type InventoryFunctionProps = fn.FunctionPropsNoPackage & +export type InventoryFunctionProps = fn.FunctionPropsConfigurable & DatabaseProps & InventoryFunctionConfig; diff --git a/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/migrate.ts b/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/migrate.ts index 97abb6e0f..3556b0c20 100644 --- a/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/migrate.ts +++ b/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/migrate.ts @@ -1,17 +1,22 @@ import { Construct } from 'constructs'; import * as fn from './function'; import { DatabaseProps } from './function'; +import { Duration } from 'aws-cdk-lib'; /** * Props for the migrate function. */ -export type MigrateFunctionProps = fn.FunctionPropsNoPackage & DatabaseProps; +export type MigrateFunctionProps = fn.FunctionPropsConfigurable & DatabaseProps; /** * A construct for the Lambda migrate function. */ export class MigrateFunction extends fn.Function { constructor(scope: Construct, id: string, props: MigrateFunctionProps) { - super(scope, id, { package: 'filemanager-migrate-lambda', ...props }); + super(scope, id, { + package: 'filemanager-migrate-lambda', + timeout: Duration.minutes(2), + ...props, + }); } } From 903ff7bab271b3c9766e923f962c7d1fe60f432f Mon Sep 17 00:00:00 2001 From: Marko Malenic Date: Mon, 26 Aug 2024 15:22:16 +1000 Subject: [PATCH 20/20] fix(filemanager): check stack status to ensure that migration doesn't get stuck in UPDATE_ROLLBACK_FAILED --- .../stateless/stacks/filemanager/Cargo.lock | 25 +++ .../deploy/constructs/functions/migrate.ts | 11 +- .../filemanager-migrate-lambda/Cargo.toml | 3 + .../filemanager-migrate-lambda/src/main.rs | 163 +++++++----------- 4 files changed, 104 insertions(+), 98 deletions(-) diff --git a/lib/workload/stateless/stacks/filemanager/Cargo.lock b/lib/workload/stateless/stacks/filemanager/Cargo.lock index 7e307a632..456c3063e 100644 --- a/lib/workload/stateless/stacks/filemanager/Cargo.lock +++ b/lib/workload/stateless/stacks/filemanager/Cargo.lock @@ -548,6 +548,30 @@ dependencies = [ "uuid", ] +[[package]] +name = "aws-sdk-cloudformation" +version = "1.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ace4f1ef88afc41ef46a3343ce9ac49f78e0d03dbf749ed27e9f032004f1581" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "fastrand", + "http 0.2.12", + "once_cell", + "regex-lite", + "tracing", +] + [[package]] name = "aws-sdk-s3" version = "1.29.0" @@ -2260,6 +2284,7 @@ dependencies = [ name = "filemanager-migrate-lambda" version = "0.1.0" dependencies = [ + "aws-sdk-cloudformation", "aws_lambda_events", "filemanager", "lambda_runtime", diff --git a/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/migrate.ts b/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/migrate.ts index 3556b0c20..ed3e42605 100644 --- a/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/migrate.ts +++ b/lib/workload/stateless/stacks/filemanager/deploy/constructs/functions/migrate.ts @@ -1,7 +1,8 @@ import { Construct } from 'constructs'; import * as fn from './function'; import { DatabaseProps } from './function'; -import { Duration } from 'aws-cdk-lib'; +import { Duration, Stack } from 'aws-cdk-lib'; +import { PolicyStatement } from 'aws-cdk-lib/aws-iam'; /** * Props for the migrate function. @@ -18,5 +19,13 @@ export class MigrateFunction extends fn.Function { timeout: Duration.minutes(2), ...props, }); + + // Need to be able to determine if the stack is in rollback state. + this.addToPolicy( + new PolicyStatement({ + actions: ['cloudformation:DescribeStacks'], + resources: [Stack.of(this).stackId], + }) + ); } } diff --git a/lib/workload/stateless/stacks/filemanager/filemanager-migrate-lambda/Cargo.toml b/lib/workload/stateless/stacks/filemanager/filemanager-migrate-lambda/Cargo.toml index db96868ec..cb96d3188 100644 --- a/lib/workload/stateless/stacks/filemanager/filemanager-migrate-lambda/Cargo.toml +++ b/lib/workload/stateless/stacks/filemanager/filemanager-migrate-lambda/Cargo.toml @@ -8,10 +8,13 @@ rust-version.workspace = true [dependencies] serde = { version = "1", features = ["derive"] } +serde_json = "1" + tokio = { version = "1", features = ["macros"] } tracing = { version = "0.1" } aws_lambda_events = "0.15" +aws-sdk-cloudformation = "1" lambda_runtime = "0.13" filemanager = { path = "../filemanager", features = ["migrate"] } diff --git a/lib/workload/stateless/stacks/filemanager/filemanager-migrate-lambda/src/main.rs b/lib/workload/stateless/stacks/filemanager/filemanager-migrate-lambda/src/main.rs index cdf757396..c0db09df6 100644 --- a/lib/workload/stateless/stacks/filemanager/filemanager-migrate-lambda/src/main.rs +++ b/lib/workload/stateless/stacks/filemanager/filemanager-migrate-lambda/src/main.rs @@ -1,33 +1,15 @@ -use lambda_runtime::{run, service_fn, Error, LambdaEvent}; -use serde::de::IgnoredAny; -use serde::Deserialize; - -use crate::CloudFormationRequest::Delete; -use crate::Event::Provider; +use aws_lambda_events::cloudformation::provider::CloudFormationCustomResourceRequest; +use aws_sdk_cloudformation::types::StackStatus; +use aws_sdk_cloudformation::Client; +use filemanager::clients::aws::config; use filemanager::database::aws::migration::Migration; use filemanager::database::Client as DbClient; use filemanager::database::Migrate; use filemanager::env::Config; use filemanager::handlers::aws::{create_database_pool, update_credentials}; use filemanager::handlers::init_tracing; - -/// The lambda event for this function. This is normally a CloudFormationCustomResourceRequest. -/// If anything else is present, the migrate lambda will still attempt to perform a migration. -#[derive(Debug, Deserialize)] -#[serde(untagged)] -pub enum Event { - Provider(CloudFormationRequest), - Ignored(IgnoredAny), -} - -/// Deserialize only the Delete type because this is the only event with different behaviour. -/// Todo, replace with `provider::CloudFormationCustomResourceRequest` when it gets released: -/// https://github.com/awslabs/aws-lambda-rust-runtime/pull/846 -#[derive(Debug, Deserialize)] -#[serde(tag = "RequestType")] -pub enum CloudFormationRequest { - Delete, -} +use lambda_runtime::{run, service_fn, Error, LambdaEvent}; +use tracing::trace; #[tokio::main] async fn main() -> Result<(), Error> { @@ -35,84 +17,71 @@ async fn main() -> Result<(), Error> { let config = &Config::load()?; let options = &create_database_pool(config).await?; - run(service_fn(|event: LambdaEvent| async move { - update_credentials(options, config).await?; + let cfn_client = &Client::new(&config::Config::with_defaults().await.load()); - // Migrate depending on the type of lifecycle event using the CDK provider framework: - // https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.custom_resources-readme.html#provider-framework - // - // Note, we don't care what's contained within the event, as the action will always be - // to try and migrate unless this is a Delete event. - match event.payload { - // If it's a Delete there's no need to do anything. - Provider(Delete) => Ok(()), - _ => { - // If there's nothing to migrate, then this will just return Ok. - Ok::<_, Error>( - Migration::new(DbClient::new(options.clone())) - .migrate() - .await?, - ) - } - } - })) - .await -} + run(service_fn( + |event: LambdaEvent| async move { + update_credentials(options, config).await?; -#[cfg(test)] -mod tests { - use super::*; - use crate::CloudFormationRequest::Delete; - use crate::Event::Ignored; - use serde_json::{from_value, json}; + // Migrate depending on the type of lifecycle event using the CDK provider framework: + // https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.custom_resources-readme.html#provider-framework + match event.payload { + // Migrate normally if this resource is being created. + CloudFormationCustomResourceRequest::Create(create) => { + trace!(create = ?create, "during create"); - #[test] - fn event_deserialize_provider_delete() { - // From https://github.com/awslabs/aws-lambda-rust-runtime/blob/a68de584154958c524692cb43dc208d520d05a13/lambda-events/src/fixtures/example-cloudformation-custom-resource-provider-delete-request.json - let event = json!({ - "RequestType": "Delete", - "RequestId": "ef70561d-d4ba-42a4-801b-33ad88dafc37", - "StackId": "arn:aws:cloudformation:us-east-1:123456789012:stack/stack-name/16580499-7622-4a9c-b32f-4eba35da93da", - "ResourceType": "Custom::MyCustomResourceType", - "LogicalResourceId": "CustomResource", - "PhysicalResourceId": "custom-resource-f4bd5382-3de3-4caf-b7ad-1be06b899647", - "ResourceProperties": { - "Key1" : "string", - "Key2" : ["list"], - "Key3" : { "Key4": "map" } - } - }); + Ok::<_, Error>( + Migration::new(DbClient::new(options.clone())) + .migrate() + .await?, + ) + } + // If this is an update event, then we need to check if a rollback is in progress. + CloudFormationCustomResourceRequest::Update(update) => { + trace!(update = ?update, "during update"); - // A Provider lifecycle event should deserialize into the Provider enum. - assert!(matches!(from_value(event).unwrap(), Provider(Delete))); - } + // Find the state of the top-level stack which is being updated. This will + // contain a status indicating if this is the first update, or a rollback update. + let stack_state = cfn_client + .describe_stacks() + .stack_name(update.common.stack_id.as_str()) + .send() + .await? + .stacks + .and_then(|stacks| { + stacks.into_iter().find(|stack| { + stack.stack_id() == Some(update.common.stack_id.as_str()) + }) + }) + .and_then(|stack| stack.stack_status); - #[test] - fn event_deserialize_ignored_create() { - // From https://github.com/awslabs/aws-lambda-rust-runtime/blob/a68de584154958c524692cb43dc208d520d05a13/lambda-events/src/fixtures/example-cloudformation-custom-resource-provider-create-request.json - let event = json!({ - "RequestType": "Create", - "RequestId": "82304eb2-bdda-469f-a33b-a3f1406d0a52", - "StackId": "arn:aws:cloudformation:us-east-1:123456789012:stack/stack-name/16580499-7622-4a9c-b32f-4eba35da93da", - "ResourceType": "Custom::MyCustomResourceType", - "LogicalResourceId": "CustomResource", - "ResourceProperties": { - "Key1": "string", - "Key2": ["list"], - "Key3": { "Key4": "map" } - } - }); + // Only migrate when this is a normal update. + if let Some(ref status) = stack_state { + trace!(stack_state = ?stack_state); - // Any non-deleted cloud formation event data should be ignored. - assert!(matches!(from_value(event).unwrap(), Ignored(IgnoredAny))); - } + if let StackStatus::UpdateInProgress = status { + return Ok::<_, Error>( + Migration::new(DbClient::new(options.clone())) + .migrate() + .await?, + ); + } + } - #[test] - fn event_deserialize_ignored_empty() { - // Any other data should deserialize into the Ignored enum. - assert!(matches!( - from_value(json!({})).unwrap(), - Ignored(IgnoredAny) - )); - } + // If this was a rollback update, then no migration should be performed, + // because the previous update indicated a failed migration, and the migration + // would have already been rolled back. If a migration occurred here it would + // just fail again, resulting in an `UPDATE_ROLLBACK_FAILED`. + Ok(()) + } + // If this is a delete event, there is nothing to do. + CloudFormationCustomResourceRequest::Delete(delete) => { + trace!(delete = ?delete, "during delete"); + + Ok(()) + } + } + }, + )) + .await }