Skip to content

Commit

Permalink
Working fixture to add link models
Browse files Browse the repository at this point in the history
  • Loading branch information
Will Langdale committed Oct 21, 2024
1 parent 9dd78b2 commit 9a1f970
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 13 deletions.
13 changes: 13 additions & 0 deletions src/matchbox/common/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,19 @@ class MatchboxValidatonError(Exception):
"""Validation of data failed."""


class MatchboxModelError(Exception):
"""Model not found."""

def __init__(self, message: str = None, model_name: str = None):
if message is None:
message = "Model not found."
if model_name is not None:
message = f"Model {model_name} not found."

super().__init__(message)
self.model_name = model_name


class MatchboxDBDataError(Exception):
"""Data doesn't exist in the Matchbox source table."""

Expand Down
8 changes: 4 additions & 4 deletions src/matchbox/server/postgresql/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from sqlalchemy.engine.result import ChunkedIteratorResult
from sqlalchemy.orm import Session

from matchbox.common.exceptions import MatchboxDBDataError
from matchbox.common.exceptions import MatchboxDBDataError, MatchboxModelError
from matchbox.server.base import MatchboxDBAdapter, MatchboxModelAdapter
from matchbox.server.models import Cluster, Probability, Source, SourceWarehouse
from matchbox.server.postgresql.clusters import Clusters, clusters_association
Expand Down Expand Up @@ -106,10 +106,10 @@ def insert_clusters(
@classmethod
def get_model(cls, model_name: str) -> "MatchboxPostgresModel":
with Session(MBDB.get_engine()) as session:
model = session.query(Models).filter_by(name=model_name).first()
if model:
if model := session.query(Models).filter_by(name=model_name).first():
return cls(model)
return None
else:
raise MatchboxModelError(model_name=model_name)


class MatchboxPostgres(MatchboxDBAdapter):
Expand Down
9 changes: 6 additions & 3 deletions src/matchbox/server/postgresql/utils/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from sqlalchemy.sql.selectable import Select

from matchbox.common.db import sql_to_df
from matchbox.common.exceptions import MatchboxModelError
from matchbox.server.models import Source
from matchbox.server.postgresql.clusters import Clusters, clusters_association
from matchbox.server.postgresql.data import SourceData, SourceDataset
Expand Down Expand Up @@ -105,9 +106,11 @@ def _parent_to_tree(model_name: str, engine: Engine) -> tuple[bytes, list[bytes]
"""

with Session(engine) as session:
model = session.query(Models).filter_by(name=model_name).first()
model_children = get_all_children(model)
model_children.pop(0) # includes original model
if model := session.query(Models).filter_by(name=model_name).first():
model_children = get_all_children(model)
model_children.pop(0) # includes original model
else:
raise MatchboxModelError(model_name=model_name)

return model.sha1, [m.sha1 for m in model_children]

Expand Down
6 changes: 3 additions & 3 deletions test/fixtures/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def query_clean_crn_deduped(
crn = query(
selector=select_crn,
backend=matchbox_postgres,
model="naive_mb.crn",
model="naive_test.crn",
return_type="pandas",
)

Expand Down Expand Up @@ -239,7 +239,7 @@ def query_clean_duns_deduped(
duns = query(
selector=select_duns,
backend=matchbox_postgres,
model="naive_mb.duns",
model="naive_test.duns",
return_type="pandas",
)

Expand Down Expand Up @@ -269,7 +269,7 @@ def query_clean_cdms_deduped(
cdms = query(
selector=select_cdms,
backend=matchbox_postgres,
model="naive_mb.cdms",
model="naive_test.cdms",
return_type="pandas",
)

Expand Down
1 change: 1 addition & 0 deletions test/fixtures/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def _db_add_link_models_and_data(
) -> None:
"""Links data from the warehouse and logs in Matchbox."""
db_add_dedupe_models_and_data(
db_add_indexed_data=db_add_indexed_data,
backend=backend,
warehouse_data=warehouse_data,
dedupe_data=dedupe_data,
Expand Down
29 changes: 26 additions & 3 deletions test/server/test_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,16 @@
from pandas import DataFrame
from pytest import FixtureRequest

from ..fixtures.db import AddDedupeModelsAndDataCallable, AddIndexedDataCallable
from ..fixtures.db import (
AddDedupeModelsAndDataCallable,
AddIndexedDataCallable,
AddLinkModelsAndDataCallable,
)
from ..fixtures.models import (
dedupe_data_test_params,
dedupe_model_test_params,
link_data_test_params,
link_model_test_params,
)

dotenv_path = find_dotenv()
Expand Down Expand Up @@ -173,9 +179,26 @@ def test_query_with_dedupe_model(
assert df_crn.cluster_hash.nunique() == 1000


def test_query_with_link_model():
def test_query_with_link_model(
matchbox_postgres: MatchboxPostgres,
db_add_dedupe_models_and_data: AddDedupeModelsAndDataCallable,
db_add_indexed_data: AddIndexedDataCallable,
db_add_link_models_and_data: AddLinkModelsAndDataCallable,
warehouse_data: list[Source],
request: FixtureRequest,
):
"""Test querying data from a link point of truth."""
pass
db_add_link_models_and_data(
db_add_indexed_data=db_add_indexed_data,
db_add_dedupe_models_and_data=db_add_dedupe_models_and_data,
backend=matchbox_postgres,
warehouse_data=warehouse_data,
dedupe_data=dedupe_data_test_params,
dedupe_models=[dedupe_model_test_params[0]], # Naive deduper,
link_data=link_data_test_params,
link_models=[link_model_test_params[0]], # Deterministic linker,
request=request,
)


def test_validate_hashes(matchbox_postgres):
Expand Down

0 comments on commit 9a1f970

Please sign in to comment.