Skip to content

Commit

Permalink
Add timestamp validation and associated test
Browse files Browse the repository at this point in the history
  • Loading branch information
mitchdawson1982 committed Jan 14, 2025
1 parent 6acd450 commit 5eb54f3
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 13 deletions.
29 changes: 16 additions & 13 deletions datahub_client/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
from enum import Enum
from typing import Literal, Optional

from pydantic import BaseModel, EmailStr, Field
from pydantic import AfterValidator, BaseModel, EmailStr, Field
from typing_extensions import Annotated

from .validators import check_timestamp_is_in_the_past

DATAHUB_DATE_FORMAT = "%Y%m%d"

Expand Down Expand Up @@ -542,17 +545,23 @@ class Entity(BaseModel):
]
],
)
metadata_last_ingested: Optional[datetime] = Field(
metadata_last_ingested: Annotated[
Optional[datetime], AfterValidator(check_timestamp_is_in_the_past)
] = Field(
description="When the metadata was last updated in the catalogue",
default=None,
examples=[datetime(2011, 10, 2, 3, 0, 0)],
)
created: Optional[datetime] = Field(
created: Annotated[
Optional[datetime], AfterValidator(check_timestamp_is_in_the_past)
] = Field(
description="When the data entity was first created",
default=None,
examples=[datetime(2011, 10, 2, 3, 0, 0)],
)
data_last_modified: Optional[datetime] = Field(
data_last_modified: Annotated[
Optional[datetime], AfterValidator(check_timestamp_is_in_the_past)
] = Field(
description="When the data entity was last modified in the source system",
default=None,
examples=[datetime(2011, 10, 2, 3, 0, 0)],
Expand Down Expand Up @@ -652,7 +661,9 @@ class Table(Entity):
]
],
)
last_datajob_run_date: Optional[datetime] = Field(
last_datajob_run_date: Annotated[
Optional[datetime], AfterValidator(check_timestamp_is_in_the_past)
] = Field(
description="Indicates the time when the data were last refreshed (eg pipeline run with dbt).",
default=None,
examples=[datetime(2011, 10, 2, 3, 0, 0)],
Expand Down Expand Up @@ -681,11 +692,3 @@ class Dashboard(Entity):
description="URL to view the dashboard",
examples=["https://data.justice.gov.uk"],
)


# if __name__ == "__main__":
# import erdantic as erd

# erd.draw(Database, out="database.png")
# erd.draw(Table, out="table.png")
# erd.draw(Chart, out="chart.png")
7 changes: 7 additions & 0 deletions datahub_client/validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from datetime import datetime


def check_timestamp_is_in_the_past(datetime: datetime) -> datetime:
if datetime is not None and datetime.timestamp() >= datetime.now().timestamp():
raise ValueError("timestamp must be in the past")
return datetime
99 changes: 99 additions & 0 deletions tests/datahub_client/conftest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,26 @@
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any, Dict

import pytest
from datahub.metadata.schema_classes import DomainPropertiesClass

from datahub_client.entities import (
AccessInformation,
CustomEntityProperties,
DataSummary,
DomainRef,
EntityRef,
EntitySummary,
FurtherInformation,
GlossaryTermRef,
Governance,
OwnerRef,
RelationshipType,
TagRef,
UsageRestrictions,
)

from .test_helpers.graph_helpers import MockDataHubGraph
from .test_helpers.mce_helpers import check_golden_file

Expand Down Expand Up @@ -50,3 +67,85 @@ def pytest_addoption(parser):
default=False,
)
parser.addoption("--copy-output-files", action="store_true", default=False)


@pytest.fixture
def entity_data_with_timestamps_in_future():
future_timestamp = datetime.now() + timedelta(days=1)
return {
"urn": "urn:li:chart:(justice-data,absconds)",
"display_name": "Absconds",
"name": "Absconds",
"fully_qualified_name": "",
"description": "Number of absconds",
"relationships": {
RelationshipType.PARENT: [
EntitySummary(
entity_ref=EntityRef(
urn="urn:li:database:example", display_name="example"
),
description="entity for an example",
entity_type="DATABASE",
tags=[
TagRef(
urn="urn:li:tag:dc_display_in_catalogue",
display_name="dc_display_in_catalogue",
)
],
)
]
},
"domain": DomainRef(display_name="HMPPS", urn="urn:li:domain:HMCTS"),
"governance": Governance(
data_owner=OwnerRef(
display_name="John Doe",
email="[email protected]",
urn="urn:li:corpuser:john.doe",
),
data_stewards=[
OwnerRef(
display_name="Jane Smith",
email="[email protected]",
urn="urn:li:corpuser:jane.smith",
)
],
data_custodians=[
OwnerRef(
display_name="Rosanne Columns",
email="[email protected]",
urn="urn:li:corpuser:rosanne.columns",
)
],
),
"glossary_terms": [
GlossaryTermRef(
display_name="Essential Shared Data Asset (ESDA)",
urn="urn:li:glossaryTerm:ESDA",
description="An ESDA is...",
)
],
"metadata_last_ingested": future_timestamp,
"created": future_timestamp,
"data_last_modified": future_timestamp,
"platform": EntityRef(urn="urn:li:dataPlatform:kafka", display_name="Kafka"),
"custom_properties": CustomEntityProperties(
usage_restrictions=UsageRestrictions(
dpia_required=False, dpia_location="OneTrust"
),
access_information=AccessInformation(
dc_where_to_access_dataset="Analytical platform",
source_dataset_name="stg_xhibit_bw_history",
s3_location="s3://alpha-hmpps-reports-data",
dc_access_requirements="Access granted on request",
),
data_summary=DataSummary(row_count=123, refresh_period="Daily"),
further_information=FurtherInformation(
dc_slack_channel_name="#data-engineering",
dc_slack_channel_url="https://hmpps-data-engineering.slack.com",
dc_teams_channel_name="Data team",
dc_teams_channel_url="https://teams.microsoft.com/l/channel/123",
dc_team_email="[email protected]",
),
),
"tags_to_display": ["nomis", "data-warehouse"],
}
10 changes: 10 additions & 0 deletions tests/datahub_client/test_validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import pytest

from datahub_client.entities import Entity


def test_entity_timestamps_in_future_validation(entity_data_with_timestamps_in_future):
with pytest.raises(ValueError) as exc:
Entity(**entity_data_with_timestamps_in_future)

assert "timestamp must be in the past" in str(exc.value)

0 comments on commit 5eb54f3

Please sign in to comment.