Skip to content

Commit

Permalink
Merge branch 'main' into fmd-1184-timestamp-validation
Browse files Browse the repository at this point in the history
  • Loading branch information
mitchdawson1982 authored Jan 13, 2025
2 parents 27943c5 + bb4e3fa commit 57fc607
Show file tree
Hide file tree
Showing 17 changed files with 135 additions and 134 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/scan-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,6 @@ jobs:
- name: Upload SARIF
if: always()
id: upload_sarif
uses: github/codeql-action/upload-sarif@48ab28a6f5dbc2a99bf1e0131198dd8f1df78169 # v2.2.7
uses: github/codeql-action/upload-sarif@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c # v2.2.7
with:
sarif_file: trivy-results.sarif
22 changes: 12 additions & 10 deletions home/forms/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,24 @@
from urllib.parse import urlencode

from data_platform_catalogue.entities import FindMoJdataEntityType
from data_platform_catalogue.search_types import DomainOption
from data_platform_catalogue.search_types import SubjectAreaOption
from django import forms

from ..models.domain_model import Domain
from ..service.domain_fetcher import DomainFetcher
from ..models.subject_area_taxonomy import SubjectArea
from ..service.search_tag_fetcher import SearchTagFetcher
from ..service.subject_area_fetcher import SubjectAreaFetcher


def get_domain_choices() -> list[Domain]:
"""Make Domains API call to obtain domain choices"""
def get_subject_area_choices() -> list[SubjectArea]:
"""Make Domains API call to obtain subject area choices"""
choices = [
Domain("", "All subject areas"),
SubjectArea("", "All subject areas"),
]
list_domain_options: list[DomainOption] = DomainFetcher().fetch()
domains: list[Domain] = [Domain(d.urn, d.name) for d in list_domain_options]
choices.extend(domains)
subject_area_options: list[SubjectAreaOption] = SubjectAreaFetcher().fetch()
subject_areas: list[SubjectArea] = [
SubjectArea(d.urn, d.name) for d in subject_area_options
]
choices.extend(subject_areas)
return choices


Expand Down Expand Up @@ -64,7 +66,7 @@ class SearchForm(forms.Form):
),
)
domain = forms.ChoiceField(
choices=get_domain_choices,
choices=get_subject_area_choices,
required=False,
widget=forms.Select(
attrs={
Expand Down
25 changes: 0 additions & 25 deletions home/models/domain_model.py

This file was deleted.

27 changes: 27 additions & 0 deletions home/models/subject_area_taxonomy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import logging
from typing import NamedTuple

from data_platform_catalogue.search_types import SubjectAreaOption

logger = logging.getLogger(__name__)


class SubjectArea(NamedTuple):
urn: str
label: str


class SubjectAreaTaxonomy:
def __init__(self, subject_areas: list[SubjectAreaOption]):
self.labels = {}

self.top_level_subject_areas = [
SubjectArea(domain.urn, domain.name) for domain in subject_areas
]
logger.info(f"{self.top_level_subject_areas=}")

for urn, label in self.top_level_subject_areas:
self.labels[urn] = label

def get_label(self, urn):
return self.labels.get(urn, urn)
32 changes: 17 additions & 15 deletions home/service/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,26 @@

from data_platform_catalogue.entities import FindMoJdataEntityMapper, Mappers
from data_platform_catalogue.search_types import (
DomainOption,
MultiSelectFilter,
SearchResponse,
SortOption,
SubjectAreaOption,
)
from django.conf import settings
from django.core.paginator import Paginator
from nltk.stem import PorterStemmer

from home.forms.search import SearchForm
from home.models.domain_model import DomainModel
from home.models.subject_area_taxonomy import SubjectAreaTaxonomy

from .base import GenericService
from .domain_fetcher import DomainFetcher
from .subject_area_fetcher import SubjectAreaFetcher


class SearchService(GenericService):
def __init__(self, form: SearchForm, page: str, items_per_page: int = 20):
domains: list[DomainOption] = DomainFetcher().fetch()
self.domain_model = DomainModel(domains)
subject_areas: list[SubjectAreaOption] = SubjectAreaFetcher().fetch()
self.subject_area_taxonomy = SubjectAreaTaxonomy(subject_areas)
self.stemmer = PorterStemmer()
self.form = form
if self.form.is_bound:
Expand Down Expand Up @@ -79,16 +79,16 @@ def _get_search_results(self, page: str, items_per_page: int) -> SearchResponse:
else "ascending"
)

domain = form_data.get("domain", "")
subject_area = form_data.get("domain", "")
tags = form_data.get("tags", "")
where_to_access = self._build_custom_property_filter(
"dc_where_to_access_dataset=", form_data.get("where_to_access", [])
)
entity_types = self._build_entity_types(form_data.get("entity_types", []))

filter_value = []
if domain:
filter_value.append(MultiSelectFilter("domains", [domain]))
if subject_area:
filter_value.append(MultiSelectFilter("domains", [subject_area]))
if where_to_access:
filter_value.append(MultiSelectFilter("customProperties", where_to_access))
if tags:
Expand Down Expand Up @@ -122,13 +122,15 @@ def _get_paginator(self, items_per_page: int) -> Paginator:

def _generate_remove_filter_hrefs(self) -> dict[str, dict[str, str]] | None:
if self.form.is_bound:
domain = self.form.cleaned_data.get("domain", "")
subject_area = self.form.cleaned_data.get("domain", "")
entity_types = self.form.cleaned_data.get("entity_types", [])
where_to_access = self.form.cleaned_data.get("where_to_access", [])
tags = self.form.cleaned_data.get("tags", [])
remove_filter_hrefs = {}
if domain:
remove_filter_hrefs["Subject area"] = self._generate_domain_clear_href()
if subject_area:
remove_filter_hrefs["Subject area"] = (
self._generate_subject_area_clear_href()
)
if entity_types:
entity_types_clear_href = {}
for entity_type in entity_types:
Expand Down Expand Up @@ -161,17 +163,17 @@ def _generate_remove_filter_hrefs(self) -> dict[str, dict[str, str]] | None:

return remove_filter_hrefs

def _generate_domain_clear_href(
def _generate_subject_area_clear_href(
self,
) -> dict[str, str]:
domain = self.form.cleaned_data.get("domain", "")
subject_area = self.form.cleaned_data.get("domain", "")

label = self.domain_model.get_label(domain)
label = self.subject_area_taxonomy.get_label(subject_area)

return {
label: (
self.form.encode_without_filter(
filter_name="domain", filter_value=domain
filter_name="domain", filter_value=subject_area
)
)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from data_platform_catalogue.search_types import DomainOption
from data_platform_catalogue.search_types import SubjectAreaOption
from django.core.cache import cache

from .base import GenericService


class DomainFetcher(GenericService):
class SubjectAreaFetcher(GenericService):
"""
DomainFetcher implementation to fetch domains with the total number of
associated entities from the backend.
Expand All @@ -16,7 +16,7 @@ def __init__(self, filter_zero_entities: bool = True):
self.cache_timeout_seconds = 300
self.filter_zero_entities = filter_zero_entities

def fetch(self) -> list[DomainOption]:
def fetch(self) -> list[SubjectAreaOption]:
"""
Fetch a static list of options that is independent of the search query
and any applied filters. Values are cached for 5 seconds to avoid
Expand All @@ -29,5 +29,5 @@ def fetch(self) -> list[DomainOption]:
cache.set(self.cache_key, result, timeout=self.cache_timeout_seconds)

if self.filter_zero_entities:
result = [domain for domain in result if domain.total > 0]
result = [subject_area for subject_area in result if subject_area.total > 0]
return result
14 changes: 7 additions & 7 deletions home/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
PublicationDatasetEntityMapping,
TableEntityMapping,
)
from data_platform_catalogue.search_types import DomainOption
from data_platform_catalogue.search_types import SubjectAreaOption
from django.conf import settings
from django.http import Http404, HttpResponse, HttpResponseBadRequest
from django.shortcuts import render
Expand All @@ -31,10 +31,10 @@
DatabaseDetailsCsvFormatter,
DatasetDetailsCsvFormatter,
)
from home.service.domain_fetcher import DomainFetcher
from home.service.glossary import GlossaryService
from home.service.metadata_specification import MetadataSpecificationService
from home.service.search import SearchService
from home.service.subject_area_fetcher import SubjectAreaFetcher

type_details_map = {
TableEntityMapping.url_formatted: DatasetDetailsService,
Expand All @@ -49,10 +49,10 @@
@cache_control(max_age=300, private=True)
def home_view(request):
"""
Displys only domains that have entities tagged for display in the catalog.
Displys only subject areas that have entities tagged for display in the catalog.
"""
domains: list[DomainOption] = DomainFetcher().fetch()
context = {"domains": domains, "h1_value": "Home"}
subject_areas: list[SubjectAreaOption] = SubjectAreaFetcher().fetch()
context = {"domains": subject_areas, "h1_value": "Home"}
return render(request, "home.html", context)


Expand Down Expand Up @@ -130,7 +130,7 @@ def metadata_specification_view(request):


def cookies_view(request):
valid_domains = [
valid_subject_areas = [
urlparse(origin).netloc for origin in settings.CSRF_TRUSTED_ORIGINS
]
referer = request.META.get("HTTP_REFERER")
Expand All @@ -139,7 +139,7 @@ def cookies_view(request):
referer_domain = urlparse(referer).netloc

# Validate this referer domain against declared valid domains
if referer_domain not in valid_domains:
if referer_domain not in valid_subject_areas:
referer = "/" # Set to home page if invalid

context = {
Expand Down
57 changes: 28 additions & 29 deletions lib/datahub-client/data_platform_catalogue/client/datahub_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,6 @@
from importlib.resources import files
from typing import Sequence

from datahub.configuration.common import ConfigurationError
from datahub.emitter import mce_builder
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
from datahub.ingestion.source.common.subtypes import (
DatasetContainerSubTypes,
DatasetSubTypes,
)
from datahub.metadata import schema_classes
from datahub.metadata.com.linkedin.pegasus2avro.common import DataPlatformInstance
from datahub.metadata.schema_classes import (
ChangeTypeClass,
ContainerClass,
ContainerPropertiesClass,
DatasetPropertiesClass,
DomainPropertiesClass,
DomainsClass,
OtherSchemaClass,
OwnerClass,
OwnershipClass,
OwnershipTypeClass,
SchemaFieldClass,
SchemaFieldDataTypeClass,
SchemaMetadataClass,
SubTypesClass,
)

from data_platform_catalogue.client.exceptions import (
AspectDoesNotExist,
ConnectivityError,
Expand Down Expand Up @@ -63,10 +36,36 @@
TableEntityMapping,
)
from data_platform_catalogue.search_types import (
DomainOption,
MultiSelectFilter,
SearchResponse,
SortOption,
SubjectAreaOption,
)
from datahub.configuration.common import ConfigurationError
from datahub.emitter import mce_builder
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
from datahub.ingestion.source.common.subtypes import (
DatasetContainerSubTypes,
DatasetSubTypes,
)
from datahub.metadata import schema_classes
from datahub.metadata.com.linkedin.pegasus2avro.common import DataPlatformInstance
from datahub.metadata.schema_classes import (
ChangeTypeClass,
ContainerClass,
ContainerPropertiesClass,
DatasetPropertiesClass,
DomainPropertiesClass,
DomainsClass,
OtherSchemaClass,
OwnerClass,
OwnershipClass,
OwnershipTypeClass,
SchemaFieldClass,
SchemaFieldDataTypeClass,
SchemaMetadataClass,
SubTypesClass,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -222,7 +221,7 @@ def list_domains(
query: str = "*",
filters: Sequence[MultiSelectFilter] | None = None,
count: int = 1000,
) -> list[DomainOption]:
) -> list[SubjectAreaOption]:
"""
Returns a list of DomainOption objects
"""
Expand Down
Loading

0 comments on commit 57fc607

Please sign in to comment.