Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chore: Refactor Django orcabus_id #784

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 40 additions & 33 deletions lib/workload/stateless/stacks/metadata-manager/app/fields.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,55 @@
import hashlib

import ulid
from django.core.validators import RegexValidator
from django.db import models

ULID_REGEX_STR = r"[0123456789ABCDEFGHJKMNPQRSTVWXYZ]{26}"
ulid_validator = RegexValidator(regex=ULID_REGEX_STR,
message='ULID is expected to be 26 characters long',
code='invalid_orcabus_id')


def get_ulid() -> str:
return ulid.new().str

class HashField(models.CharField):
description = (
"HashField is related to some base fields (other columns) in a model and"
"stores its hashed value for better indexing performance."
)

def __init__(self, base_fields, *args, **kwargs):
"""
:param base_fields: name of fields storing the value to be hashed
"""
self.base_fields = base_fields
kwargs["max_length"] = 64
super(HashField, self).__init__(*args, **kwargs)
class UlidField(models.CharField):
description = "An OrcaBus internal ID (ULID)"

def __init__(self, *args, **kwargs):
kwargs['max_length'] = 26 # ULID length
kwargs['validators'] = [ulid_validator]
kwargs['default'] = get_ulid
super().__init__(*args, **kwargs)

def deconstruct(self):
name, path, args, kwargs = super().deconstruct()
del kwargs["max_length"]
if self.base_fields is not None:
kwargs["base_fields"] = self.base_fields
del kwargs['validators']
del kwargs['default']
return name, path, args, kwargs

def pre_save(self, instance, add):
self.calculate_hash(instance)
return super(HashField, self).pre_save(instance, add)

def calculate_hash(self, instance):
sha256 = hashlib.sha256()
for field in self.base_fields:
value = getattr(instance, field)
sha256.update(value.encode("utf-8"))
setattr(instance, self.attname, sha256.hexdigest())
class OrcaBusIdField(UlidField):
description = "An OrcaBus internal ID (based on ULID)"

def __init__(self, prefix='', *args, **kwargs):
self.prefix = prefix
super().__init__(*args, **kwargs)

@property
def non_db_attrs(self):
return super().non_db_attrs + ("prefix",)

class HashFieldHelper(object):
def __init__(self):
self.__sha256 = hashlib.sha256()
def from_db_value(self, value, expression, connection):
if value and self.prefix != '':
return f"{self.prefix}.{value}"
else:
return value

def add(self, value):
self.__sha256.update(value.encode("utf-8"))
return self
def to_python(self, value):
# This will be called when the function
return self.get_prep_value(value)

def calculate_hash(self):
return self.__sha256.hexdigest()
def get_prep_value(self, value):
# We just want the last 26 characters which is the ULID (ignoring any prefix) when dealing with the database
return value[-26:]
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Generated by Django 5.1.4 on 2024-12-17 01:44

import app.fields
from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('app', '0002_remove_historicalcontact_history_user_and_more'),
]

operations = [
migrations.AlterField(
model_name='contact',
name='orcabus_id',
field=app.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='historicalcontact',
name='orcabus_id',
field=app.fields.OrcaBusIdField(db_index=True),
),
migrations.AlterField(
model_name='historicalindividual',
name='orcabus_id',
field=app.fields.OrcaBusIdField(db_index=True),
),
migrations.AlterField(
model_name='historicallibrary',
name='orcabus_id',
field=app.fields.OrcaBusIdField(db_index=True),
),
migrations.AlterField(
model_name='historicalproject',
name='orcabus_id',
field=app.fields.OrcaBusIdField(db_index=True),
),
migrations.AlterField(
model_name='historicalsample',
name='orcabus_id',
field=app.fields.OrcaBusIdField(db_index=True),
),
migrations.AlterField(
model_name='historicalsubject',
name='orcabus_id',
field=app.fields.OrcaBusIdField(db_index=True),
),
migrations.AlterField(
model_name='individual',
name='orcabus_id',
field=app.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='library',
name='orcabus_id',
field=app.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='project',
name='orcabus_id',
field=app.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='sample',
name='orcabus_id',
field=app.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='subject',
name='orcabus_id',
field=app.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
]
31 changes: 11 additions & 20 deletions lib/workload/stateless/stacks/metadata-manager/app/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from simple_history.models import HistoricalRecords

from rest_framework.settings import api_settings

from app.pagination import PaginationConstant

logger = logging.getLogger(__name__)
Expand All @@ -42,7 +43,9 @@ def reduce_multi_values_qor(key: str, values: List[str]):
):
values = [values]
return reduce(
operator.or_, (Q(**{"%s__iexact" % key: value})
# Apparently the `get_prep_value` from the custom fields.py is not called prior hitting the Db but,
# the regular `__exact` still execute that function.
operator.or_, (Q(**{"%s__exact" % key: value})
Copy link
Member Author

@williamputraintan williamputraintan Dec 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One caveat I found here is the strip/annotation will not work if it uses iexact ( on the custom filtering we had that based on model). Currently, changing it to filter for only exact match.

for value in values)
)

Expand Down Expand Up @@ -102,7 +105,6 @@ def update_or_create_if_needed(self, search_key: dict, data: dict, user_id: str
"""
is_created = False
is_updated = False

try:
obj = self.get(**search_key)
for key, value in data.items():
Expand All @@ -125,27 +127,16 @@ class BaseModel(models.Model):
class Meta:
abstract = True

orcabus_id = models.CharField(
primary_key=True,
unique=True,
editable=False,
blank=False,
null=False,
validators=[
RegexValidator(
regex=r'[\w]{26}$',
message='ULID is expected to be 26 characters long',
code='invalid_orcabus_id'
)]

)

def save(self, *args, **kwargs):
if not self.orcabus_id:
self.orcabus_id = ulid.new().str
# To make django validate the constraint before saving it
self.full_clean()

return super(BaseModel, self).save(*args, **kwargs)
super(BaseModel, self).save(*args, **kwargs)

# Reload the object from the database to ensure custom fields like OrcaBusIdField
# invoke the `from_db_value` method (which provides the annotation) after saving.
self.refresh_from_db()


@classmethod
def get_fields(cls):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from django.db import models

from app.fields import OrcaBusIdField
from app.models.base import BaseModel, BaseManager, BaseHistoricalRecords


class ContactManager(BaseManager):
pass


class Contact(BaseModel):
orcabus_id_prefix = 'ctc.'
objects = ContactManager()

orcabus_id = OrcaBusIdField(primary_key=True, prefix='ctc')
contact_id = models.CharField(
unique=True,
blank=True,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from django.db import models

from app.fields import OrcaBusIdField
from app.models.base import BaseModel, BaseManager, BaseHistoricalRecords


Expand All @@ -8,9 +9,9 @@ class IndividualManager(BaseManager):


class Individual(BaseModel):
orcabus_id_prefix = 'idv.'
objects = IndividualManager()

orcabus_id = OrcaBusIdField(primary_key=True, prefix='idv')
individual_id = models.CharField(
unique=True,
blank=True,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from django.db import models

from app.fields import OrcaBusIdField
from app.models.base import BaseManager, BaseModel, BaseHistoricalRecords
from app.models.subject import Subject
from app.models.sample import Sample
Expand Down Expand Up @@ -64,9 +65,9 @@ class LibraryProjectLink(models.Model):


class Library(BaseModel):
orcabus_id_prefix = 'lib.'
objects = LibraryManager()

orcabus_id = OrcaBusIdField(primary_key=True, prefix='lib')
library_id = models.CharField(
unique=True,
blank=True,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from django.db import models

from app.fields import OrcaBusIdField
from app.models.contact import Contact
from app.models.base import BaseModel, BaseManager, BaseHistoricalRecords

Expand All @@ -19,9 +20,9 @@ class ProjectContactLink(models.Model):


class Project(BaseModel):
orcabus_id_prefix = 'prj.'
objects = ProjectManager()

orcabus_id = OrcaBusIdField(primary_key=True, prefix='prj')
project_id = models.CharField(
unique=True,
blank=True,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import ulid
from django.db import models

from app.fields import OrcaBusIdField
from app.models.base import BaseModel, BaseManager, BaseHistoricalRecords


Expand Down Expand Up @@ -31,9 +32,9 @@ class SampleManager(BaseManager):


class Sample(BaseModel):
orcabus_id_prefix = 'smp.'
objects = SampleManager()

orcabus_id = OrcaBusIdField(primary_key=True, prefix='smp')
sample_id = models.CharField(
unique=True,
blank=True,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from django.db import models

from app.fields import OrcaBusIdField
from app.models.base import BaseModel, BaseManager, BaseHistoricalRecords


Expand All @@ -17,8 +18,9 @@ class SubjectIndividualLink(models.Model):


class Subject(BaseModel):
orcabus_id_prefix = 'sbj.'
objects = SubjectManager()

orcabus_id = OrcaBusIdField(primary_key=True, prefix='sbj')
subject_id = models.CharField(
unique=True,
blank=True,
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,22 +1,14 @@
from abc import ABC

from rest_framework import serializers

from rest_framework.serializers import ModelSerializer
from app.models import Contact
from .base import SerializersBase


class ContactBaseSerializer(SerializersBase):
prefix = Contact.orcabus_id_prefix


class ContactSerializer(ContactBaseSerializer):
class ContactSerializer(ModelSerializer):
class Meta:
model = Contact
fields = "__all__"


class ContactDetailSerializer(ContactBaseSerializer):
class ContactDetailSerializer(ModelSerializer):
from .project import ProjectSerializer

project_set = ProjectSerializer(many=True, read_only=True)
Expand All @@ -26,7 +18,7 @@ class Meta:
fields = "__all__"


class ContactHistorySerializer(ContactBaseSerializer):
class ContactHistorySerializer(ModelSerializer):
class Meta:
model = Contact.history.model
fields = "__all__"
Loading
Loading