Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use tags to populate subject areas #1233

Merged
merged 9 commits into from
Jan 15, 2025
Merged
30 changes: 7 additions & 23 deletions datahub_client/client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json
import logging
from importlib.resources import files
from typing import Sequence

from datahub.configuration.common import ConfigurationError
Expand All @@ -26,6 +25,7 @@
TableEntityMapping,
)
from datahub_client.exceptions import ConnectivityError, EntityDoesNotExist
from datahub_client.graphql.loader import get_graphql_query
from datahub_client.parsers import (
ChartParser,
DashboardParser,
Expand Down Expand Up @@ -104,26 +104,10 @@ def __init__(self, jwt_token, api_url: str, graph=None):

self.search_client = SearchClient(self.graph)

self.database_query = (
files("datahub_client.graphql")
.joinpath("getContainerDetails.graphql")
.read_text()
)
self.dataset_query = (
files("datahub_client.graphql")
.joinpath("getDatasetDetails.graphql")
.read_text()
)
self.chart_query = (
files("datahub_client.graphql")
.joinpath("getChartDetails.graphql")
.read_text()
)
self.dashboard_query = (
files("datahub_client.graphql")
.joinpath("getDashboardDetails.graphql")
.read_text()
)
self.database_query = get_graphql_query("getContainerDetails")
self.dataset_query = get_graphql_query("getDatasetDetails")
self.chart_query = get_graphql_query("getChartDetails")
self.dashboard_query = get_graphql_query("getDashboardDetails")

def check_entity_exists_by_urn(self, urn: str | None):
if urn is not None:
Expand Down Expand Up @@ -190,7 +174,7 @@ def search(
sort=sort,
)

def list_domains(
def list_subject_areas(
self,
query: str = "*",
filters: Sequence[MultiSelectFilter] | None = None,
Expand All @@ -199,7 +183,7 @@ def list_domains(
"""
Returns a list of DomainOption objects
"""
return self.search_client.list_domains(
return self.search_client.list_subject_areas(
query=query, filters=filters, count=count
)

Expand Down
37 changes: 37 additions & 0 deletions datahub_client/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,10 @@ class TagRef(BaseModel):
Reference to a tag
"""

@classmethod
def from_name(cls, name):
return cls(display_name=name, urn=f"urn:li:tag:{name}")

display_name: str = Field(
description="Human friendly tag name",
examples=["PII"],
Expand Down Expand Up @@ -692,3 +696,36 @@ class Dashboard(Entity):
description="URL to view the dashboard",
examples=["https://data.justice.gov.uk"],
)


class SubjectArea(TagRef):
@property
def domain_urn(self):
return self.urn.replace(":tag:", ":domain:")


class SubjectAreaTaxonomy:
TOP_LEVEL = [
SubjectArea.from_name("Bold"),
SubjectArea.from_name("Civil"),
SubjectArea.from_name("Courts"),
SubjectArea.from_name("Electronic monitoring"),
SubjectArea.from_name("Finance"),
SubjectArea.from_name("General"),
SubjectArea.from_name("Interventions"),
SubjectArea.from_name("OPG"),
SubjectArea.from_name("People"),
SubjectArea.from_name("Prison"),
SubjectArea.from_name("Probation"),
SubjectArea.from_name("Property"),
SubjectArea.from_name("Risk"),
]

@classmethod
def get_top_level(cls, name):
matches = [i for i in cls.TOP_LEVEL if i.display_name == name]
return matches[0] if matches else None

@classmethod
def is_subject_area(cls, name):
return cls.get_top_level(name) is not None
25 changes: 16 additions & 9 deletions datahub_client/graphql/getChartDetails.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,7 @@ query getChartDetails($urn: String!) {
}
}
tags {
tags {
tag {
urn
properties {
name
description
}
}
}
...globalTagsFields
}
domain {
domain {
Expand Down Expand Up @@ -80,6 +72,21 @@ query getChartDetails($urn: String!) {
}
}

fragment globalTagsFields on GlobalTags {
tags {
tag {
urn
type
name
properties {
name
colorHex
}
}
associatedUrn
}
}

fragment ownershipFields on Ownership {
owners {
owner {
Expand Down
1 change: 1 addition & 0 deletions datahub_client/graphql/getContainerDetails.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ fragment globalTagsFields on GlobalTags {
tag {
urn
type
name
properties {
name
colorHex
Expand Down
18 changes: 18 additions & 0 deletions datahub_client/graphql/getDashboardDetails.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ query getDashboard($urn: String!) {
ownership {
...ownershipFields
}
tags {
...globalTagsFields
}
properties {
name
description
Expand Down Expand Up @@ -55,6 +58,21 @@ query getDashboard($urn: String!) {
}
}

fragment globalTagsFields on GlobalTags {
tags {
tag {
urn
type
name
properties {
name
colorHex
}
}
associatedUrn
}
}

fragment ownershipFields on Ownership {
owners {
owner {
Expand Down
25 changes: 16 additions & 9 deletions datahub_client/graphql/getDatasetDetails.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -113,15 +113,7 @@ query getDatasetDetails($urn: String!) {
description
}
tags {
tags {
tag {
urn
properties {
name
description
}
}
}
...globalTagsFields
}
lastIngested
domain {
Expand Down Expand Up @@ -173,6 +165,21 @@ query getDatasetDetails($urn: String!) {
}
}

fragment globalTagsFields on GlobalTags {
tags {
tag {
urn
type
name
properties {
name
colorHex
}
}
associatedUrn
}
}

fragment ownershipFields on Ownership {
owners {
owner {
Expand Down
26 changes: 26 additions & 0 deletions datahub_client/graphql/listSubjectAreas.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
query listSubjectAreas(
$filters:[FacetFilterInput!]
$query: String!
$types: [EntityType!]
) {
aggregateAcrossEntities(
input: {searchFlags: {maxAggValues:100}, query: $query, types: $types, facets: ["tags"], orFilters: [{and: $filters}]}
) {
facets {
field
aggregations {
value
count
entity {
urn
... on Tag {
name
properties {
name
}
}
}
}
}
}
}
23 changes: 23 additions & 0 deletions datahub_client/graphql/loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import logging
from importlib.resources import files

from ..exceptions import CatalogueError

GRAPHQL_FILES_PATH = "datahub_client.graphql"
GRAPHQL_FILE_EXTENSION = ".graphql"

logger = logging.getLogger(__name__)


def get_graphql_query(graphql_query_file_name: str) -> str:
query_text = (
files(GRAPHQL_FILES_PATH)
.joinpath(f"{graphql_query_file_name}{GRAPHQL_FILE_EXTENSION}")
.read_text()
)
if not query_text:
logger.error("No graphql query file found for %s", graphql_query_file_name)
raise CatalogueError(
f"No graphql query file found for {graphql_query_file_name}"
)
return query_text
55 changes: 19 additions & 36 deletions datahub_client/graphql/search.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,7 @@ query Search(
...ownershipFields
}
tags {
tags {
tag {
urn
properties {
name
description
}
}
}
...globalTagsFields
}
glossaryTerms {
terms {
Expand Down Expand Up @@ -134,15 +126,7 @@ query Search(
}
}
tags {
tags {
tag {
urn
properties {
name
description
}
}
}
...globalTagsFields
}
glossaryTerms {
terms {
Expand Down Expand Up @@ -194,15 +178,7 @@ query Search(
description
}
tags {
tags {
tag {
urn
properties {
name
description
}
}
}
...globalTagsFields
}
glossaryTerms {
terms {
Expand Down Expand Up @@ -254,15 +230,7 @@ query Search(
}
}
tags {
tags {
tag {
urn
properties {
name
description
}
}
}
...globalTagsFields
}
glossaryTerms {
terms {
Expand All @@ -281,6 +249,21 @@ query Search(
}
}

fragment globalTagsFields on GlobalTags {
tags {
tag {
urn
type
name
properties {
name
colorHex
}
}
associatedUrn
}
}

fragment ownershipFields on Ownership {
owners {
owner {
Expand Down
Loading
Loading