Skip to content

Commit

Permalink
wfm: OrcaBusId Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
williamputraintan committed Dec 17, 2024
1 parent b8f1a28 commit eafc802
Show file tree
Hide file tree
Showing 35 changed files with 182 additions and 207 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def non_db_attrs(self):
return super().non_db_attrs + ("prefix",)

def from_db_value(self, value, expression, connection):
# print('hello from_db_value', value)
if value and self.prefix != '':
return f"{self.prefix}.{value}"
else:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,70 +1,55 @@
import hashlib
import ulid

from django.db import models
from django.core.validators import RegexValidator
from django.db import models

orcabus_id_validator = RegexValidator(
regex=r'[\w]{26}$',
message='ULID is expected to be 26 characters long',
code='invalid_orcabus_id'
)

ULID_REGEX_STR = r"[0123456789ABCDEFGHJKMNPQRSTVWXYZ]{26}"
ulid_validator = RegexValidator(regex=ULID_REGEX_STR,
message='ULID is expected to be 26 characters long',
code='invalid_orcabus_id')

class OrcabusIdField(models.CharField):
description = "An OrcaBus internal ID (ULID)"

def __init__(self, prefix, *args, **kwargs):
kwargs["max_length"] = 26 # ULID length
kwargs['unique'] = True
kwargs['editable'] = False
kwargs['blank'] = False
kwargs['null'] = False
kwargs['default'] = ulid.new
kwargs['validators'] = [orcabus_id_validator]
super().__init__(*args, **kwargs)
def get_ulid() -> str:
return ulid.new().str


class HashField(models.CharField):
description = (
"HashField is related to some base fields (other columns) in a model and"
"stores its hashed value for better indexing performance."
)
class UlidField(models.CharField):
description = "An OrcaBus internal ID (ULID)"

def __init__(self, base_fields, *args, **kwargs):
"""
:param base_fields: name of fields storing the value to be hashed
"""
self.base_fields = base_fields
kwargs["max_length"] = 64
super(HashField, self).__init__(*args, **kwargs)
def __init__(self, *args, **kwargs):
kwargs['max_length'] = 26 # ULID length
kwargs['validators'] = [ulid_validator]
kwargs['default'] = get_ulid
super().__init__(*args, **kwargs)

def deconstruct(self):
name, path, args, kwargs = super().deconstruct()
del kwargs["max_length"]
if self.base_fields is not None:
kwargs["base_fields"] = self.base_fields
del kwargs['validators']
del kwargs['default']
return name, path, args, kwargs

def pre_save(self, instance, add):
self.calculate_hash(instance)
return super(HashField, self).pre_save(instance, add)

def calculate_hash(self, instance):
sha256 = hashlib.sha256()
for field in self.base_fields:
value = getattr(instance, field)
sha256.update(value.encode("utf-8"))
setattr(instance, self.attname, sha256.hexdigest())
class OrcaBusIdField(UlidField):
description = "An OrcaBus internal ID (based on ULID)"

def __init__(self, prefix='', *args, **kwargs):
self.prefix = prefix
super().__init__(*args, **kwargs)

@property
def non_db_attrs(self):
return super().non_db_attrs + ("prefix",)

class HashFieldHelper(object):
def __init__(self):
self.__sha256 = hashlib.sha256()
def from_db_value(self, value, expression, connection):
if value and self.prefix != '':
return f"{self.prefix}.{value}"
else:
return value

def add(self, value):
self.__sha256.update(value.encode("utf-8"))
return self
def to_python(self, value):
# This will be called when the function
return self.get_prep_value(value)

def calculate_hash(self):
return self.__sha256.hexdigest()
def get_prep_value(self, value):
# We just want the last 26 characters which is the ULID (ignoring any prefix) when dealing with the database
return value[-26:]
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Generated by Django 5.1.2 on 2024-12-17 08:04

import workflow_manager.fields
from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('workflow_manager', '0002_workflowruncomment'),
]

operations = [
migrations.AlterField(
model_name='analysis',
name='orcabus_id',
field=workflow_manager.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='analysiscontext',
name='orcabus_id',
field=workflow_manager.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='analysisrun',
name='orcabus_id',
field=workflow_manager.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='library',
name='orcabus_id',
field=workflow_manager.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='libraryassociation',
name='orcabus_id',
field=workflow_manager.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='payload',
name='orcabus_id',
field=workflow_manager.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='state',
name='orcabus_id',
field=workflow_manager.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='workflow',
name='orcabus_id',
field=workflow_manager.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='workflowrun',
name='orcabus_id',
field=workflow_manager.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='workflowruncomment',
name='orcabus_id',
field=workflow_manager.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
]
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from django.db import models

from workflow_manager.fields import OrcaBusIdField
from workflow_manager.models.base import OrcaBusBaseModel, OrcaBusBaseManager
from workflow_manager.models.analysis_context import AnalysisContext
from workflow_manager.models.workflow import Workflow
Expand All @@ -13,8 +14,7 @@ class Analysis(OrcaBusBaseModel):
class Meta:
unique_together = ["analysis_name", "analysis_version"]

orcabus_id_prefix = 'ana.'

orcabus_id = OrcaBusIdField(primary_key=True, prefix='ana')
analysis_name = models.CharField(max_length=255)
analysis_version = models.CharField(max_length=255)
description = models.CharField(max_length=255)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from django.db import models

from workflow_manager.fields import OrcaBusIdField
from workflow_manager.models.base import OrcaBusBaseModel, OrcaBusBaseManager


Expand All @@ -11,8 +12,7 @@ class AnalysisContext(OrcaBusBaseModel):
class Meta:
unique_together = ["name", "usecase"]

orcabus_id_prefix = 'ctx.'

orcabus_id = OrcaBusIdField(primary_key=True, prefix='ctx')
name = models.CharField(max_length=255)
usecase = models.CharField(max_length=255)
description = models.CharField(max_length=255)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from django.db import models

from workflow_manager.fields import OrcaBusIdField
from workflow_manager.models.analysis import Analysis
from workflow_manager.models.analysis_context import AnalysisContext
from workflow_manager.models.base import OrcaBusBaseModel, OrcaBusBaseManager
Expand All @@ -11,8 +12,8 @@ class AnalysisRunManager(OrcaBusBaseManager):


class AnalysisRun(OrcaBusBaseModel):
orcabus_id_prefix = 'anr.'

orcabus_id = OrcaBusIdField(primary_key=True, prefix='anr')
analysis_run_name = models.CharField(max_length=255)
comment = models.CharField(max_length=255, null=True, blank=True)
status = models.CharField(max_length=255, null=True, blank=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,11 @@
ManyToOneRel, ManyToManyRel, OneToOneRel, QuerySet
from rest_framework.settings import api_settings

from workflow_manager.fields import OrcaBusIdField
from workflow_manager.pagination import PaginationConstant

logger = logging.getLogger(__name__)

orcabus_id_validator = RegexValidator(
regex=r'^[\w]{26}$',
message='ULID is expected to be 26 characters long',
code='invalid_orcabus_id'
)

class OrcaBusBaseManager(models.Manager):

Expand All @@ -36,7 +32,9 @@ def reduce_multi_values_qor(key: str, values: List[str]):
):
values = [values]
return reduce(
operator.or_, (Q(**{"%s__iexact" % key: value})
# Apparently the `get_prep_value` from the custom fields.py is not called prior hitting the Db but,
# the regular `__exact` still execute that function.
operator.or_, (Q(**{"%s__exact" % key: value})
for value in values)
)

Expand Down Expand Up @@ -80,32 +78,17 @@ class OrcaBusBaseModel(models.Model):
class Meta:
abstract = True

orcabus_id_prefix = None
orcabus_prefix = ''

orcabus_id = models.CharField(
primary_key=True,
unique=True,
editable=False,
blank=False,
null=False,
validators=[orcabus_id_validator]
)
orcabus_id = OrcaBusIdField(primary_key=True, prefix=orcabus_prefix)

def save(self, *args, **kwargs):
# handle the OrcaBus ID
if not self.orcabus_id:
# if no OrcaBus ID was provided, then generate one
self.orcabus_id = ulid.new().str
else:
# check provided OrcaBus ID
if len(self.orcabus_id) > 26:
# assume the OrcaBus ID carries the prefix
# we strip it off and continue to the validation
l = len(self.orcabus_id_prefix)
self.orcabus_id = str(self.orcabus_id)[l:]
self.full_clean() # make sure we are validating the inputs (especially the OrcaBus ID)
return super(OrcaBusBaseModel, self).save(*args, **kwargs)
super(OrcaBusBaseModel, self).save(*args, **kwargs)

# Reload the object from the database to ensure custom fields like OrcaBusIdField
# invoke the `from_db_value` method (which provides the annotation) after saving.
self.refresh_from_db()

@classmethod
def get_fields(cls):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from django.core.serializers.json import DjangoJSONEncoder
from django.db import models

from workflow_manager.fields import OrcaBusIdField
from workflow_manager.models.base import OrcaBusBaseModel, OrcaBusBaseManager


Expand All @@ -10,8 +11,7 @@ class LibraryManager(OrcaBusBaseManager):

class Library(OrcaBusBaseModel):

orcabus_id_prefix = "lib."

orcabus_id = OrcaBusIdField(primary_key=True, prefix='lib')
library_id = models.CharField(max_length=255)

objects = LibraryManager()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from django.core.serializers.json import DjangoJSONEncoder
from django.db import models

from workflow_manager.fields import OrcaBusIdField
from workflow_manager.models.base import OrcaBusBaseModel, OrcaBusBaseManager


Expand All @@ -9,8 +10,7 @@ class PayloadManager(OrcaBusBaseManager):


class Payload(OrcaBusBaseModel):
orcabus_id_prefix = 'pld.'

orcabus_id = OrcaBusIdField(primary_key=True, prefix='pld')
payload_ref_id = models.CharField(max_length=255, unique=True)
version = models.CharField(max_length=255)
data = models.JSONField(encoder=DjangoJSONEncoder)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from django.db import models

from workflow_manager.fields import OrcaBusIdField
from workflow_manager.models.base import OrcaBusBaseModel, OrcaBusBaseManager
from workflow_manager.models.payload import Payload
from workflow_manager.models.workflow_run import WorkflowRun
Expand Down Expand Up @@ -88,9 +89,8 @@ class State(OrcaBusBaseModel):
class Meta:
unique_together = ["workflow_run", "status", "timestamp"]

orcabus_id_prefix = 'stt.'

# --- mandatory fields
orcabus_id = OrcaBusIdField(primary_key=True, prefix='stt')
status = models.CharField(max_length=255) # TODO: How and where to enforce conventions?
timestamp = models.DateTimeField()
comment = models.CharField(max_length=255, null=True, blank=True)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from django.db import models

from workflow_manager.fields import OrcaBusIdField
from workflow_manager.models.base import OrcaBusBaseModel, OrcaBusBaseManager


Expand All @@ -12,8 +13,7 @@ class Meta:
# a combo of this gives us human-readable pipeline id
unique_together = ["workflow_name", "workflow_version"]

orcabus_id_prefix = 'wfl.'

orcabus_id = OrcaBusIdField(primary_key=True, prefix='wfl')
workflow_name = models.CharField(max_length=255)
workflow_version = models.CharField(max_length=255)
execution_engine = models.CharField(max_length=255)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from django.db import models

from workflow_manager.fields import OrcaBusIdField
from workflow_manager.models.analysis_run import AnalysisRun
from workflow_manager.models.base import OrcaBusBaseModel, OrcaBusBaseManager
from workflow_manager.models.library import Library
Expand All @@ -11,8 +12,7 @@ class WorkflowRunManager(OrcaBusBaseManager):


class WorkflowRun(OrcaBusBaseModel):
orcabus_id_prefix = 'wfr.'

orcabus_id = OrcaBusIdField(primary_key=True, prefix='wfr')
portal_run_id = models.CharField(max_length=255, unique=True)

execution_id = models.CharField(max_length=255, null=True, blank=True)
Expand All @@ -38,6 +38,7 @@ def get_latest_state(self):
# retrieve all related states and get the latest one
return self.states.order_by('-timestamp').first()


class LibraryAssociationManager(OrcaBusBaseManager):
pass

Expand Down
Loading

0 comments on commit eafc802

Please sign in to comment.