Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat (MM): History API for Models #606

Merged
merged 5 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion lib/workload/stateless/stacks/metadata-manager/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,15 @@ makemigrations:
migrate:
@python manage.py migrate

insert-data:
mock: reset-db migrate
@python manage.py insert_mock_data

suite:
@python manage.py test --parallel

coverage:
@coverage run manage.py test
@coverage report -m

# full mock suite test pipeline - install deps, bring up compose stack, run suite, bring down compose stack
test: install up suite down
6 changes: 6 additions & 0 deletions lib/workload/stateless/stacks/metadata-manager/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,12 @@ To stop the running server, simply use the `make stop` command
To run the test from scratch use `make test`, but if you want to test with a running database you could use `make suite`
.

Coverage test

```bash
make coverage
```

### Development

#### Migrations
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from django.core.management import BaseCommand

from app.tests.utils import clear_all_data


# https://docs.djangoproject.com/en/5.0/howto/custom-management-commands/
class Command(BaseCommand):
help = "Delete all DB data"

def handle(self, *args, **options):
clear_all_data()

print("Done")
Original file line number Diff line number Diff line change
@@ -1,22 +1,166 @@
import json
import pandas as pd
from django.core.exceptions import ObjectDoesNotExist
from django.core.management import BaseCommand

from app.models import Subject, Library, Sample, Individual, Project, Contact
from app.tests.utils import clear_all_data
from proc.service.tracking_sheet_srv import sanitize_lab_metadata_df, persist_lab_metadata
from proc.tests.test_tracking_sheet_srv import RECORD_1, RECORD_2, RECORD_3, SHEET_YEAR


class Command(BaseCommand):
"""
python manage.py insert_mock_data
"""
help = "Generate mock Metadata into database for local development and testing"

def handle(self, *args, **options):
print("insert data from proc service test")
clear_all_data()
load_mock_from_wfm()

mock_sheet_data = [RECORD_1, RECORD_2, RECORD_3]

metadata_pd = pd.json_normalize(mock_sheet_data)
metadata_pd = sanitize_lab_metadata_df(metadata_pd)
result = persist_lab_metadata(metadata_pd, SHEET_YEAR)
def load_mock_from_proc():
"""Not in use for now, as loading data from wfm is preferred to sync data"""
mock_sheet_data = [RECORD_1, RECORD_2, RECORD_3]

print(json.dumps(result, indent=4))
print("insert mock data completed")
metadata_pd = pd.json_normalize(mock_sheet_data)
metadata_pd = sanitize_lab_metadata_df(metadata_pd)
result = persist_lab_metadata(metadata_pd, SHEET_YEAR, is_emit_eb_events=False)

print(json.dumps(result, indent=4))
print("insert mock data completed")


def load_mock_from_wfm():
# The libraries are taken from WFM as of 16/10/2024
# Will sync this so test data sync across MM <=> WFM
libraries = [
{
"orcabus_id": "01J5M2JFE1JPYV62RYQEG99CP1",
"phenotype": "tumor",
"library_id": "L000001",
"assay": "TsqNano",
"type": "WGS",
"subject": "SBJ00001",
"workflow": "clinical"
},
{
"orcabus_id": "02J5M2JFE1JPYV62RYQEG99CP2",
"phenotype": "normal",
"library_id": "L000002",
"assay": "TsqNano",
"type": "WGS",
"subject": "SBJ00001",
"workflow": "clinical"
},
{
"orcabus_id": "03J5M2JFE1JPYV62RYQEG99CP3",
"phenotype": "tumor",
"library_id": "L000003",
"assay": "TsqNano",
"type": "WGS",
"subject": "SBJ00002",
"workflow": "research"
},
{
"orcabus_id": "04J5M2JFE1JPYV62RYQEG99CP4",
"phenotype": "normal",
"library_id": "L000004",
"assay": "TsqNano",
"type": "WGS",
"subject": "SBJ00002",
"workflow": "research"
},
{
"orcabus_id": "05J5M2JFE1JPYV62RYQEG99CP5",
"phenotype": "tumor",
"library_id": "L000005",
"assay": "ctTSOv2",
"type": "ctDNA",
"subject": "SBJ00003",
"workflow": "clinical"
},
{
"orcabus_id": "06J5M2JFE1JPYV62RYQEG99CP6",
"phenotype": "tumor",
"library_id": "L000006",
"assay": "ctTSOv2",
"type": "ctDNA",
"subject": "SBJ00003",
"workflow": "research"
},
]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just dropping a note. No action need for the comment.

We should reorganise this as centralised seed data (in JSON or YAML format) somewhere in docs or shared directory down the track. It will get complex; when we include scenario and more services in the role play -- Case Manager, etc.

For current O3 iteration, this is fine.


for lib in libraries:
idv, is_idv_created, is_idv_updated = Individual.objects.update_or_create_if_needed(
search_key={
"individual_id": 'IDV0001',
"source": "lab"
},
data={
"individual_id": 'IDV0001',
"source": "lab"
}
)

subject, is_sub_created, is_sub_updated = Subject.objects.update_or_create_if_needed(
search_key={"subject_id": lib["subject"]},
data={
"subject_id": lib["subject"],
}
)

try:
subject.individual_set.get(orcabus_id=idv.orcabus_id)
except ObjectDoesNotExist:
subject.individual_set.add(idv)

sample, is_smp_created, is_smp_updated = Sample.objects.update_or_create_if_needed(
search_key={"sample_id": f"""smp-{lib["library_id"]}"""},
data={
"sample_id": f"""smp-{lib["library_id"]}""",
"external_sample_id": f"""ext-smp-{lib["library_id"]}""",
"source": "blood",
}
)

contact, is_ctc_created, is_ctc_updated = Contact.objects.update_or_create_if_needed(
search_key={"contact_id": 'ctc-1'},
data={
"contact_id": 'ctc-1',
}
)

project, is_prj_created, is_prj_updated = Project.objects.update_or_create_if_needed(
search_key={"project_id": 'prj-1'},
data={
"project_id": 'prj-1',
}
)

try:
project.contact_set.get(orcabus_id=contact.orcabus_id)
except ObjectDoesNotExist:
project.contact_set.add(contact)

library, is_lib_created, is_lib_updated = Library.objects.update_or_create_if_needed(
search_key={'library_id': lib["library_id"]},
data={
"orcabus_id": lib["orcabus_id"],
"library_id": lib["library_id"],
"phenotype": lib["phenotype"],
"assay": lib["assay"],
"type": lib["type"],
"workflow": lib["workflow"],

"subject_id": subject.orcabus_id,
"sample_id": sample.orcabus_id,
}

)

try:
library.project_set.get(orcabus_id=project.orcabus_id)
except ObjectDoesNotExist:
library.project_set.add(project)
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from abc import ABC

from rest_framework import serializers

from app.models import Contact
from .base import SerializersBase

Expand All @@ -7,7 +11,6 @@ class ContactBaseSerializer(SerializersBase):


class ContactSerializer(ContactBaseSerializer):

class Meta:
model = Contact
fields = "__all__"
Expand All @@ -22,3 +25,8 @@ class Meta:
model = Contact
fields = "__all__"


class ContactHistorySerializer(ContactBaseSerializer):
class Meta:
model = Contact.history.model
fields = "__all__"
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,8 @@ class IndividualDetailSerializer(IndividualSerializer):

subject_set = SubjectSerializer(many=True, read_only=True)


class IndividualHistorySerializer(IndividualSerializer):
class Meta:
model = Individual.history.model
fields = "__all__"
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from app.models import Library, Sample, Subject
from abc import ABC

from rest_framework import serializers

from app.models import Library, Sample, Subject, Project
from .base import SerializersBase


Expand All @@ -13,8 +17,11 @@ class Meta:

def to_representation(self, instance):
representation = super().to_representation(instance)
representation['sample'] = Sample.orcabus_id_prefix + representation['sample']
representation['subject'] = Subject.orcabus_id_prefix + representation['subject']

if representation.get('sample', None):
representation['sample'] = Sample.orcabus_id_prefix + representation['sample']
if representation.get('subject', None):
representation['subject'] = Subject.orcabus_id_prefix + representation['subject']
return representation


Expand All @@ -31,3 +38,18 @@ class LibraryDetailSerializer(LibraryBaseSerializer):
class Meta:
model = Library
fields = "__all__"


class LibraryHistorySerializer(LibrarySerializer):
class ProjectOrcabusIdSet(serializers.RelatedField):
def to_internal_value(self, data):
raise NotImplementedError()

def to_representation(self, value):
return Project.orcabus_id_prefix + value.project.orcabus_id

class Meta:
model = Library.history.model
fields = "__all__"

project_set = ProjectOrcabusIdSet(many=True, read_only=True)
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from rest_framework import serializers

from .base import SerializersBase
from app.models import Project
from app.models import Project, Contact


class ProjectBaseSerializer(SerializersBase):
Expand All @@ -22,3 +24,20 @@ class ProjectDetailSerializer(ProjectBaseSerializer):
class Meta:
model = Project
fields = "__all__"


class ProjectHistorySerializer(ProjectBaseSerializer):
class ContactOrcabusIdSet(serializers.RelatedField):

def to_internal_value(self, data):
raise NotImplementedError()

def to_representation(self, value):
return Contact.orcabus_id_prefix + value.contact.orcabus_id

class Meta:
model = Project.history.model
fields = "__all__"

contact_set = ContactOrcabusIdSet(many=True, read_only=True)

Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,10 @@ class Meta:
fields = '__all__'

library_set = LibrarySerializer(many=True, read_only=True)


class SampleHistorySerializer(SampleBaseSerializer):

class Meta:
model = Sample.history.model
fields = "__all__"
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from app.models import Subject
from rest_framework import serializers

from app.models import Subject, Individual
from .base import SerializersBase


Expand All @@ -24,3 +26,18 @@ class Meta:

individual_set = IndividualSerializer(many=True, read_only=True)
library_set = LibrarySerializer(many=True, read_only=True)


class SubjectHistorySerializer(SubjectBaseSerializer):
class IndividualOrcabusIdSet(serializers.RelatedField):
def to_internal_value(self, data):
raise NotImplementedError()

def to_representation(self, value):
return Individual.orcabus_id_prefix + value.individual.orcabus_id

class Meta:
model = Subject.history.model
fields = "__all__"

individual_set = IndividualOrcabusIdSet(many=True, read_only=True)
Loading