diff --git a/core/middleware.py b/core/middleware.py index 11c87e8e..4444f14c 100644 --- a/core/middleware.py +++ b/core/middleware.py @@ -1,6 +1,8 @@ import logging from data_platform_catalogue.client.exceptions import ConnectivityError +from django.core.exceptions import BadRequest +from django.http import Http404 from django.shortcuts import render logger = logging.getLogger(__name__) @@ -14,6 +16,32 @@ def __call__(self, request): return self.get_response(request) def process_exception(self, request, exception): + logger.exception(exception) if isinstance(exception, ConnectivityError): - logger.exception(exception) - return render(request, "500_datahub_unavailable.html", status=500) + return render( + request, + "500_datahub_unavailable.html", + context={"h1_value": "Catalogue service unavailable"}, + status=500, + ) + elif isinstance(exception, BadRequest): + return render( + request, + "400.html", + context={"h1_value": "Bad request"}, + status=400, + ) + elif isinstance(exception, Http404): + return render( + request, + "404.html", + context={"h1_value": "Page not found"}, + status=404, + ) + elif isinstance(exception, Exception): + return render( + request, + "500.html", + context={"h1_value": "Server error"}, + status=500, + ) diff --git a/home/forms/search.py b/home/forms/search.py index 14e5e2e2..23b653ff 100644 --- a/home/forms/search.py +++ b/home/forms/search.py @@ -6,6 +6,7 @@ from ..models.domain_model import Domain, DomainModel from ..service.search_facet_fetcher import SearchFacetFetcher +from ..service.search_tag_fetcher import SearchTagFetcher def get_domain_choices() -> list[Domain]: @@ -47,6 +48,11 @@ def get_entity_types(): ) +def get_tags(): + tags = SearchTagFetcher().fetch() + return tags + + class SelectWithOptionAttribute(forms.Select): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -125,6 +131,8 @@ class SearchForm(forms.Form): clear_filter = forms.BooleanField(initial=False, required=False) clear_label = forms.BooleanField(initial=False, required=False) + tags = forms.MultipleChoiceField(choices=get_tags, required=False) + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.initial["sort"] = "relevance" diff --git a/home/service/search.py b/home/service/search.py index f78dc52c..ef197517 100644 --- a/home/service/search.py +++ b/home/service/search.py @@ -80,6 +80,7 @@ def _get_search_results(self, page: str, items_per_page: int) -> SearchResponse: sort = form_data.get("sort", "relevance") domain = form_data.get("domain", "") subdomain = form_data.get("subdomain", "") + tags = form_data.get("tags", "") domains_and_subdomains = domains_with_their_subdomains( domain, subdomain, self.domain_model ) @@ -92,6 +93,10 @@ def _get_search_results(self, page: str, items_per_page: int) -> SearchResponse: filter_value.append(MultiSelectFilter("domains", domains_and_subdomains)) if where_to_access: filter_value.append(MultiSelectFilter("customProperties", where_to_access)) + if tags: + filter_value.append( + MultiSelectFilter("tags", [f"urn:li:tag:{tag}" for tag in tags]) + ) page_for_search = str(int(page) - 1) if sort == "ascending": @@ -122,6 +127,7 @@ def _generate_remove_filter_hrefs(self) -> dict[str, dict[str, str]] | None: domain = self.form.cleaned_data.get("domain", "") entity_types = self.form.cleaned_data.get("entity_types", []) where_to_access = self.form.cleaned_data.get("where_to_access", []) + tags = self.form.cleaned_data.get("tags", []) remove_filter_hrefs = {} if domain: remove_filter_hrefs["domain"] = self._generate_domain_clear_href() @@ -144,6 +150,14 @@ def _generate_remove_filter_hrefs(self) -> dict[str, dict[str, str]] | None: ) ) remove_filter_hrefs["Where To Access"] = where_to_access_clear_href + + if tags: + tags_clear_href = {} + for tag in tags: + tags_clear_href[tag] = self.form.encode_without_filter( + filter_name="tags", filter_value=tag + ) + remove_filter_hrefs["Tags"] = tags_clear_href else: remove_filter_hrefs = None diff --git a/home/service/search_facet_fetcher.py b/home/service/search_facet_fetcher.py index fb2cdec7..9ee2acca 100644 --- a/home/service/search_facet_fetcher.py +++ b/home/service/search_facet_fetcher.py @@ -8,7 +8,7 @@ class SearchFacetFetcher(GenericService): def __init__(self): self.client = self._get_catalogue_client() self.cache_key = "search_facets" - self.cache_timeout_seconds = 5 + self.cache_timeout_seconds = 300 def fetch(self) -> SearchFacets: """ diff --git a/home/service/search_tag_fetcher.py b/home/service/search_tag_fetcher.py new file mode 100644 index 00000000..9073abfe --- /dev/null +++ b/home/service/search_tag_fetcher.py @@ -0,0 +1,24 @@ +from django.core.cache import cache + +from .base import GenericService + + +class SearchTagFetcher(GenericService): + def __init__(self): + self.client = self._get_catalogue_client() + self.cache_key = "search_tags" + self.cache_timeout_seconds = 300 + + def fetch(self) -> list: + """ + Fetch a static list of options that is independent of the search query + and any applied filters. Values are cached for 5 seconds to avoid + unnecessary queries. + """ + result = cache.get(self.cache_key) + if not result: + result = self.client.get_tags() + + cache.set(self.cache_key, result, timeout=self.cache_timeout_seconds) + + return result diff --git a/lib/datahub-client/data_platform_catalogue/client/datahub_client.py b/lib/datahub-client/data_platform_catalogue/client/datahub_client.py index 4bac794e..99b61e33 100644 --- a/lib/datahub-client/data_platform_catalogue/client/datahub_client.py +++ b/lib/datahub-client/data_platform_catalogue/client/datahub_client.py @@ -3,6 +3,30 @@ from importlib.resources import files from typing import Sequence +from datahub.configuration.common import ConfigurationError +from datahub.emitter import mce_builder +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph +from datahub.ingestion.source.common.subtypes import ( + DatasetContainerSubTypes, + DatasetSubTypes, +) +from datahub.metadata import schema_classes +from datahub.metadata.com.linkedin.pegasus2avro.common import DataPlatformInstance +from datahub.metadata.schema_classes import ( + ChangeTypeClass, + ContainerClass, + ContainerPropertiesClass, + DatasetPropertiesClass, + DomainPropertiesClass, + DomainsClass, + OtherSchemaClass, + SchemaFieldClass, + SchemaFieldDataTypeClass, + SchemaMetadataClass, + SubTypesClass, +) + from data_platform_catalogue.client.exceptions import ( AspectDoesNotExist, ConnectivityError, @@ -39,29 +63,6 @@ SearchResponse, SortOption, ) -from datahub.configuration.common import ConfigurationError -from datahub.emitter import mce_builder -from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph -from datahub.ingestion.source.common.subtypes import ( - DatasetContainerSubTypes, - DatasetSubTypes, -) -from datahub.metadata import schema_classes -from datahub.metadata.com.linkedin.pegasus2avro.common import DataPlatformInstance -from datahub.metadata.schema_classes import ( - ChangeTypeClass, - ContainerClass, - ContainerPropertiesClass, - DatasetPropertiesClass, - DomainPropertiesClass, - DomainsClass, - OtherSchemaClass, - SchemaFieldClass, - SchemaFieldDataTypeClass, - SchemaMetadataClass, - SubTypesClass, -) logger = logging.getLogger(__name__) @@ -210,6 +211,7 @@ def search_facets( result_types: Sequence[ResultType] = ( ResultType.TABLE, ResultType.CHART, + ResultType.DATABASE, ), filters: Sequence[MultiSelectFilter] = (), ) -> SearchFacets: @@ -224,6 +226,10 @@ def get_glossary_terms(self, count: int = 1000) -> SearchResponse: """Wraps the client's glossary terms query""" return self.search_client.get_glossary_terms(count) + def get_tags(self, count: int = 2000): + """Wraps the client's get tags query""" + return self.search_client.get_tags(count) + def get_table_details(self, urn) -> Table: if self.check_entity_exists_by_urn(urn): response = self.graph.execute_graphql(self.dataset_query, {"urn": urn})[ diff --git a/lib/datahub-client/data_platform_catalogue/client/graphql/getChartDetails.graphql b/lib/datahub-client/data_platform_catalogue/client/graphql/getChartDetails.graphql index 4174368c..b853713a 100644 --- a/lib/datahub-client/data_platform_catalogue/client/graphql/getChartDetails.graphql +++ b/lib/datahub-client/data_platform_catalogue/client/graphql/getChartDetails.graphql @@ -5,6 +5,17 @@ query getChartDetails($urn: String!) { platform { name } + tags { + tags { + tag { + urn + properties { + name + description + } + } + } + } domain { domain { urn diff --git a/lib/datahub-client/data_platform_catalogue/client/graphql/getTags.graphql b/lib/datahub-client/data_platform_catalogue/client/graphql/getTags.graphql new file mode 100644 index 00000000..b44350be --- /dev/null +++ b/lib/datahub-client/data_platform_catalogue/client/graphql/getTags.graphql @@ -0,0 +1,16 @@ +query getTags( + $count: Int! +) { +searchAcrossEntities( + input: {types: TAG, query: "*", start: 0, count: $count} +) { + start + count + total + searchResults { + entity { + urn + } + } +} +} diff --git a/lib/datahub-client/data_platform_catalogue/client/graphql_helpers.py b/lib/datahub-client/data_platform_catalogue/client/graphql_helpers.py index b0a05f45..508067e9 100644 --- a/lib/datahub-client/data_platform_catalogue/client/graphql_helpers.py +++ b/lib/datahub-client/data_platform_catalogue/client/graphql_helpers.py @@ -83,6 +83,13 @@ def parse_tags(entity: dict[str, Any]) -> list[TagRef]: tags = [] for tag in outer_tags.get("tags", []): properties = tag.get("tag", {}).get("properties", {}) + # This is needed because tags cerated by dbt seemily don't have properties + # populated + if not properties and tag.get("tag", {}).get("urn"): + properties = { + "name": tag.get("tag", {}).get("urn").replace("urn:li:tag:", "") + } + if properties: tags.append( TagRef( diff --git a/lib/datahub-client/data_platform_catalogue/client/search.py b/lib/datahub-client/data_platform_catalogue/client/search.py index cfc70388..a0029baf 100644 --- a/lib/datahub-client/data_platform_catalogue/client/search.py +++ b/lib/datahub-client/data_platform_catalogue/client/search.py @@ -48,6 +48,11 @@ def __init__(self, graph: DataHubGraph): .joinpath("getGlossaryTerms.graphql") .read_text() ) + self.get_tags_query = ( + files("data_platform_catalogue.client.graphql") + .joinpath("getTags.graphql") + .read_text() + ) def search( self, @@ -321,6 +326,36 @@ def get_glossary_terms(self, count: int = 1000) -> SearchResponse: total_results=response["total"], page_results=page_results ) + def get_tags(self, count: int = 2000): + """ + gets a list of tag urns from datahub. + + If the total tags in datahub is more + than 2000 (we have too many tags) but the count should be increased to get + all tags + """ + variables = {"count": count} + try: + response = self.graph.execute_graphql(self.get_tags_query, variables) + except GraphError as e: + raise CatalogueError("Unable to execute getTags query") from e + + response = response["searchAcrossEntities"] + logger.debug(json.dumps(response, indent=2)) + + return self._parse_global_tags(response) + + def _parse_global_tags(self, tag_query_results) -> list[tuple[str, str]]: + """parse results of get tags query""" + + # name properties of tags are often not set, i.e. all those from dbt + # so better to get tag name from tag urn. + tags_list = [ + (tag["entity"]["urn"].replace("urn:li:tag:", ""), tag["entity"]["urn"]) + for tag in tag_query_results["searchResults"] + ] + return tags_list + def _parse_container(self, entity: dict[str, Any], matches) -> SearchResult: """ Map a Container entity to a SearchResult diff --git a/lib/datahub-client/data_platform_catalogue/entities.py b/lib/datahub-client/data_platform_catalogue/entities.py index 76a157e8..040b2484 100644 --- a/lib/datahub-client/data_platform_catalogue/entities.py +++ b/lib/datahub-client/data_platform_catalogue/entities.py @@ -347,7 +347,7 @@ class Entity(BaseModel): tags: list[TagRef] = Field( default_factory=list, description="Additional tags to add.", - examples=[[TagRef(display_name="ESDA", urn="urn:li:tag:PII")]], + examples=[[TagRef(display_name="ESDA", urn="urn:li:tag:ESDA")]], ) glossary_terms: list[GlossaryTermRef] = Field( default_factory=list, @@ -383,6 +383,18 @@ class Entity(BaseModel): description="Fields to add to DataHub custom properties", default_factory=CustomEntityProperties, ) + tags_to_display: list[str] = Field( + description="a list of tag display_names where tags starting 'dc_' are filtered out", # noqa: E501 + init=False, + default=[], + ) + + def model_post_init(self, __context): + self.tags_to_display = [ + tag.display_name + for tag in self.tags + if not tag.display_name.startswith("dc_") + ] class Database(Entity): diff --git a/lib/datahub-client/data_platform_catalogue/search_types.py b/lib/datahub-client/data_platform_catalogue/search_types.py index 0ba325e2..3c43fcd5 100644 --- a/lib/datahub-client/data_platform_catalogue/search_types.py +++ b/lib/datahub-client/data_platform_catalogue/search_types.py @@ -68,11 +68,13 @@ class SearchResult: glossary_terms: list[GlossaryTermRef] = field(default_factory=list) last_modified: datetime | None = None created: datetime | None = None - tags_to_display: list[TagRef] = field(init=False) + tags_to_display: list[str] = field(init=False) def __post_init__(self): self.tags_to_display = [ - tag for tag in self.tags if not tag.display_name.startswith("dc_") + tag.display_name + for tag in self.tags + if not tag.display_name.startswith("dc_") ] diff --git a/lib/datahub-client/tests/client/datahub/test_search.py b/lib/datahub-client/tests/client/datahub/test_search.py index 58a03b9b..b09a7a26 100644 --- a/lib/datahub-client/tests/client/datahub/test_search.py +++ b/lib/datahub-client/tests/client/datahub/test_search.py @@ -2,6 +2,7 @@ from unittest.mock import MagicMock import pytest + from data_platform_catalogue.client.search import SearchClient from data_platform_catalogue.entities import ( AccessInformation, @@ -1075,4 +1076,29 @@ def test_tag_to_display(tags, result): created=None, ) - assert [t.display_name for t in test_search_result.tags_to_display] == result + assert test_search_result.tags_to_display == result + + +def test_get_tags(mock_graph, searcher): + + datahub_response = { + "searchAcrossEntities": { + "start": 0, + "count": 200, + "total": 3, + "searchResults": [ + {"entity": {"urn": "urn:li:tag:tag1"}}, + {"entity": {"urn": "urn:li:tag:tag2"}}, + {"entity": {"urn": "urn:li:tag:tag3"}}, + ], + } + } + mock_graph.execute_graphql = MagicMock(return_value=datahub_response) + + response = searcher.get_tags() + + assert response == [ + ("tag1", "urn:li:tag:tag1"), + ("tag2", "urn:li:tag:tag2"), + ("tag3", "urn:li:tag:tag3"), + ] diff --git a/scss/base.scss b/scss/base.scss index 404ad5f0..6e51fb2a 100644 --- a/scss/base.scss +++ b/scss/base.scss @@ -1,6 +1,6 @@ $govuk-assets-path: "/static/assets/"; $moj-assets-path: "/static/assets/"; -$fmj-images-path: "/static/assets/images"; +$app-images-path: "/static/assets/images"; // Removes need to put classes on all elements $govuk-global-styles: true; diff --git a/scss/components/README.md b/scss/components/README.md index e551e792..56b83381 100644 --- a/scss/components/README.md +++ b/scss/components/README.md @@ -26,7 +26,7 @@ This variant should be used only for search-as-you-type behaviour that does not See `enhanced-glossary.js`. ```html -