Skip to content

Commit

Permalink
Use tags to populate subject areas (#1233)
Browse files Browse the repository at this point in the history
* feat: add subject area models

* refactor: use helper function

* refactor: use tags to populate subject areas list

* refactor: filter on subject area using tags

* refactor: use tags when displaying subject area

The subject area labels were previously populated from the domain in
Datahub. This now comes from a tag.

Where there are multiple tags, pick the first one for now. In a future
commit, I'll enable multiple subject areas to be displayed.

* refactor: remove unused code for domains

* Update datahub_client/search/search_client.py

Co-authored-by: Murdo <[email protected]>

* Update datahub_client/parsers.py

Co-authored-by: Murdo <[email protected]>

* refactor: extract helper for name parsing

---------

Co-authored-by: Murdo <[email protected]>
  • Loading branch information
MatMoore and murdo-moj authored Jan 15, 2025
1 parent c631f6f commit 9dc0038
Show file tree
Hide file tree
Showing 17 changed files with 308 additions and 210 deletions.
30 changes: 7 additions & 23 deletions datahub_client/client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json
import logging
from importlib.resources import files
from typing import Sequence

from datahub.configuration.common import ConfigurationError
Expand All @@ -26,6 +25,7 @@
TableEntityMapping,
)
from datahub_client.exceptions import ConnectivityError, EntityDoesNotExist
from datahub_client.graphql.loader import get_graphql_query
from datahub_client.parsers import (
ChartParser,
DashboardParser,
Expand Down Expand Up @@ -104,26 +104,10 @@ def __init__(self, jwt_token, api_url: str, graph=None):

self.search_client = SearchClient(self.graph)

self.database_query = (
files("datahub_client.graphql")
.joinpath("getContainerDetails.graphql")
.read_text()
)
self.dataset_query = (
files("datahub_client.graphql")
.joinpath("getDatasetDetails.graphql")
.read_text()
)
self.chart_query = (
files("datahub_client.graphql")
.joinpath("getChartDetails.graphql")
.read_text()
)
self.dashboard_query = (
files("datahub_client.graphql")
.joinpath("getDashboardDetails.graphql")
.read_text()
)
self.database_query = get_graphql_query("getContainerDetails")
self.dataset_query = get_graphql_query("getDatasetDetails")
self.chart_query = get_graphql_query("getChartDetails")
self.dashboard_query = get_graphql_query("getDashboardDetails")

def check_entity_exists_by_urn(self, urn: str | None):
if urn is not None:
Expand Down Expand Up @@ -190,7 +174,7 @@ def search(
sort=sort,
)

def list_domains(
def list_subject_areas(
self,
query: str = "*",
filters: Sequence[MultiSelectFilter] | None = None,
Expand All @@ -199,7 +183,7 @@ def list_domains(
"""
Returns a list of DomainOption objects
"""
return self.search_client.list_domains(
return self.search_client.list_subject_areas(
query=query, filters=filters, count=count
)

Expand Down
37 changes: 37 additions & 0 deletions datahub_client/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,10 @@ class TagRef(BaseModel):
Reference to a tag
"""

@classmethod
def from_name(cls, name):
return cls(display_name=name, urn=f"urn:li:tag:{name}")

display_name: str = Field(
description="Human friendly tag name",
examples=["PII"],
Expand Down Expand Up @@ -692,3 +696,36 @@ class Dashboard(Entity):
description="URL to view the dashboard",
examples=["https://data.justice.gov.uk"],
)


class SubjectArea(TagRef):
@property
def domain_urn(self):
return self.urn.replace(":tag:", ":domain:")


class SubjectAreaTaxonomy:
TOP_LEVEL = [
SubjectArea.from_name("Bold"),
SubjectArea.from_name("Civil"),
SubjectArea.from_name("Courts"),
SubjectArea.from_name("Electronic monitoring"),
SubjectArea.from_name("Finance"),
SubjectArea.from_name("General"),
SubjectArea.from_name("Interventions"),
SubjectArea.from_name("OPG"),
SubjectArea.from_name("People"),
SubjectArea.from_name("Prison"),
SubjectArea.from_name("Probation"),
SubjectArea.from_name("Property"),
SubjectArea.from_name("Risk"),
]

@classmethod
def get_top_level(cls, name):
matches = [i for i in cls.TOP_LEVEL if i.display_name == name]
return matches[0] if matches else None

@classmethod
def is_subject_area(cls, name):
return cls.get_top_level(name) is not None
25 changes: 16 additions & 9 deletions datahub_client/graphql/getChartDetails.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,7 @@ query getChartDetails($urn: String!) {
}
}
tags {
tags {
tag {
urn
properties {
name
description
}
}
}
...globalTagsFields
}
domain {
domain {
Expand Down Expand Up @@ -80,6 +72,21 @@ query getChartDetails($urn: String!) {
}
}

fragment globalTagsFields on GlobalTags {
tags {
tag {
urn
type
name
properties {
name
colorHex
}
}
associatedUrn
}
}

fragment ownershipFields on Ownership {
owners {
owner {
Expand Down
1 change: 1 addition & 0 deletions datahub_client/graphql/getContainerDetails.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ fragment globalTagsFields on GlobalTags {
tag {
urn
type
name
properties {
name
colorHex
Expand Down
18 changes: 18 additions & 0 deletions datahub_client/graphql/getDashboardDetails.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ query getDashboard($urn: String!) {
ownership {
...ownershipFields
}
tags {
...globalTagsFields
}
properties {
name
description
Expand Down Expand Up @@ -55,6 +58,21 @@ query getDashboard($urn: String!) {
}
}

fragment globalTagsFields on GlobalTags {
tags {
tag {
urn
type
name
properties {
name
colorHex
}
}
associatedUrn
}
}

fragment ownershipFields on Ownership {
owners {
owner {
Expand Down
25 changes: 16 additions & 9 deletions datahub_client/graphql/getDatasetDetails.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -113,15 +113,7 @@ query getDatasetDetails($urn: String!) {
description
}
tags {
tags {
tag {
urn
properties {
name
description
}
}
}
...globalTagsFields
}
lastIngested
domain {
Expand Down Expand Up @@ -173,6 +165,21 @@ query getDatasetDetails($urn: String!) {
}
}

fragment globalTagsFields on GlobalTags {
tags {
tag {
urn
type
name
properties {
name
colorHex
}
}
associatedUrn
}
}

fragment ownershipFields on Ownership {
owners {
owner {
Expand Down
26 changes: 26 additions & 0 deletions datahub_client/graphql/listSubjectAreas.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
query listSubjectAreas(
$filters:[FacetFilterInput!]
$query: String!
$types: [EntityType!]
) {
aggregateAcrossEntities(
input: {searchFlags: {maxAggValues:100}, query: $query, types: $types, facets: ["tags"], orFilters: [{and: $filters}]}
) {
facets {
field
aggregations {
value
count
entity {
urn
... on Tag {
name
properties {
name
}
}
}
}
}
}
}
23 changes: 23 additions & 0 deletions datahub_client/graphql/loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import logging
from importlib.resources import files

from ..exceptions import CatalogueError

GRAPHQL_FILES_PATH = "datahub_client.graphql"
GRAPHQL_FILE_EXTENSION = ".graphql"

logger = logging.getLogger(__name__)


def get_graphql_query(graphql_query_file_name: str) -> str:
query_text = (
files(GRAPHQL_FILES_PATH)
.joinpath(f"{graphql_query_file_name}{GRAPHQL_FILE_EXTENSION}")
.read_text()
)
if not query_text:
logger.error("No graphql query file found for %s", graphql_query_file_name)
raise CatalogueError(
f"No graphql query file found for {graphql_query_file_name}"
)
return query_text
55 changes: 19 additions & 36 deletions datahub_client/graphql/search.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,7 @@ query Search(
...ownershipFields
}
tags {
tags {
tag {
urn
properties {
name
description
}
}
}
...globalTagsFields
}
glossaryTerms {
terms {
Expand Down Expand Up @@ -134,15 +126,7 @@ query Search(
}
}
tags {
tags {
tag {
urn
properties {
name
description
}
}
}
...globalTagsFields
}
glossaryTerms {
terms {
Expand Down Expand Up @@ -194,15 +178,7 @@ query Search(
description
}
tags {
tags {
tag {
urn
properties {
name
description
}
}
}
...globalTagsFields
}
glossaryTerms {
terms {
Expand Down Expand Up @@ -254,15 +230,7 @@ query Search(
}
}
tags {
tags {
tag {
urn
properties {
name
description
}
}
}
...globalTagsFields
}
glossaryTerms {
terms {
Expand All @@ -281,6 +249,21 @@ query Search(
}
}

fragment globalTagsFields on GlobalTags {
tags {
tag {
urn
type
name
properties {
name
colorHex
}
}
associatedUrn
}
}

fragment ownershipFields on Ownership {
owners {
owner {
Expand Down
Loading

0 comments on commit 9dc0038

Please sign in to comment.