Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

COPDS-1672: Template-like json for catalogue manager #137

Merged
merged 7 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions alembic/versions/e5875ac4a047_save_contents_configuration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""save contents configuration.

Revision ID: e5875ac4a047
Revises: 694fde86c48c
Create Date: 2024-12-05 14:20:53.059624

"""

import sqlalchemy as sa
from sqlalchemy.dialects import postgresql as dialect_postgresql

from alembic import op

# revision identifiers, used by Alembic.
revision = "e5875ac4a047"
down_revision = "694fde86c48c"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.add_column(
"catalogue_updates",
sa.Column("contents_config", dialect_postgresql.JSONB, default={}),
)


def downgrade() -> None:
op.drop_column("catalogue_updates", "contents_config")
62 changes: 52 additions & 10 deletions cads_catalogue/contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@

import sqlalchemy as sa
import structlog
import yaml

from cads_catalogue import config, database, layout_manager, object_storage
from cads_catalogue import config, database, layout_manager, object_storage, utils

THIS_PATH = os.path.abspath(os.path.dirname(__file__))
logger = structlog.get_logger(__name__)
Expand Down Expand Up @@ -106,23 +107,31 @@ def content_sync(
return db_content


def load_content_folder(content_folder: str | pathlib.Path) -> List[dict[str, Any]]:
def load_content_folder(
content_folder: str | pathlib.Path, global_context: dict[str, Any] | None = None
) -> List[dict[str, Any]]:
"""
Parse folder and returns a list of metadata dictionaries, each one for a content.

Parameters
----------
content_folder: folder path containing content files
global_context: dictionary to be used for rendering templates

Returns
-------
list of dictionaries of information parsed.
"""
if global_context is None:
global_context = dict()
metadata_file_path = os.path.join(content_folder, "metadata.json")
with open(metadata_file_path) as fp:
data = json.load(fp)
data_raw = json.load(fp)
ret_value = []
for site in data["site"]:
for site in data_raw["site"][:]:
site_context = global_context.get("default", dict())
site_context.update(global_context.get(site, dict()))
data = utils.dict_render(data_raw, site_context)
metadata = {
"site": site,
"type": data["resource_type"],
Expand Down Expand Up @@ -158,6 +167,7 @@ def load_content_folder(content_folder: str | pathlib.Path) -> List[dict[str, An
def transform_layout(
content: dict[str, Any],
storage_settings: config.ObjectStorageSettings,
global_context: dict[str, Any] | None = None,
):
"""
Modify layout.json information inside content metadata, with related uploads to the object storage.
Expand All @@ -166,13 +176,17 @@ def transform_layout(
----------
content: metadata of a loaded content from files
storage_settings: object with settings to access the object storage
global_context: dictionary to be used for rendering templates

Returns
-------
modified version of input resource metadata
"""
if not content.get("layout"):
return content
site, ctype, slug = content["site"], content["type"], content["slug"]
if global_context is None:
global_context = dict()
layout_file_path = content["layout"]
if not os.path.isfile(layout_file_path):
return content
Expand All @@ -181,10 +195,14 @@ def transform_layout(
layout_data = json.load(fp)
logger.debug(f"input layout_data: {layout_data}")

layout_data = layout_manager.transform_html_blocks(layout_data, layout_folder_path)
layout_raw_data = layout_manager.transform_html_blocks(
layout_data, layout_folder_path
)
site_context = global_context.get("default", dict())
site_context.update(global_context.get(site, dict()))
layout_data = utils.dict_render(layout_raw_data, site_context)

logger.debug(f"output layout_data: {layout_data}")
site, ctype, slug = content["site"], content["type"], content["slug"]
subpath = os.path.join("contents", site, ctype, slug)
content["layout"] = layout_manager.store_layout_by_data(
layout_data, content, storage_settings, subpath=subpath
Expand All @@ -193,13 +211,34 @@ def transform_layout(
return content


def load_contents(contents_root_folder: str | pathlib.Path) -> List[dict[str, Any]]:
def yaml2context(yaml_path: str | pathlib.Path | None) -> dict[str, Any]:
"""
load yaml used for rendering templates.

:param yaml_path: yaml path
:return: yaml parsed
"""
if not yaml_path:
return dict()
if not os.path.isfile(yaml_path):
logger.warning(f"{yaml_path} not found. No variable substitution in templates.")
return dict()
with open(yaml_path) as fp:
data = yaml.load(fp.read(), Loader=yaml.loader.BaseLoader)
return data


def load_contents(
contents_root_folder: str | pathlib.Path,
global_context: dict[str, Any] | None = None,
) -> List[dict[str, Any]]:
"""
Load all contents from a folder and return a dictionary of metadata extracted.

Parameters
----------
contents_root_folder: root path where to look for contents (i.e. cads-contents-json root folder)
global_context: dictionary to be used for rendering templates

Returns
-------
Expand All @@ -218,7 +257,7 @@ def load_contents(contents_root_folder: str | pathlib.Path) -> List[dict[str, An
logger.warning("unknown file %r found" % content_folder)
continue
try:
contents_md = load_content_folder(content_folder)
contents_md = load_content_folder(content_folder, global_context)
except: # noqa
logger.exception(
"failed parsing content in %s, error follows" % content_folder
Expand All @@ -233,6 +272,7 @@ def update_catalogue_contents(
contents_package_path: str | pathlib.Path,
storage_settings: config.ObjectStorageSettings,
remove_orphans: bool = True,
yaml_path: str | pathlib.Path | None = None,
):
"""
Load metadata of contents from files and sync each content in the db.
Expand All @@ -243,20 +283,22 @@ def update_catalogue_contents(
contents_package_path: root folder path of the contents package (i.e. cads-contents-json root folder)
storage_settings: object with settings to access the object storage
remove_orphans: if True, remove from the database other contents not involved (default True)
yaml_path: path to yaml file containing variables to be rendered in the json files

Returns
-------
list: list of (site, type, slug) of contents involved
"""
contents = load_contents(contents_package_path)
global_context = yaml2context(yaml_path)
contents = load_contents(contents_package_path, global_context)
logger.info(
"loaded %s contents from folder %s" % (len(contents), contents_package_path)
)
involved_content_props = []
for content in contents[:]:
site, ctype, slug = content["site"], content["type"], content["slug"]
involved_content_props.append((site, ctype, slug))
content = transform_layout(content, storage_settings)
content = transform_layout(content, storage_settings, global_context)
try:
with session.begin_nested():
content_sync(session, content, storage_settings)
Expand Down
1 change: 1 addition & 0 deletions cads_catalogue/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class CatalogueUpdate(BaseModel):
cim_repo_commit = sa.Column(sa.String)
content_repo_commit = sa.Column(sa.String)
override_md = sa.Column(dialect_postgresql.JSONB, default={})
contents_config = sa.Column(dialect_postgresql.JSONB, default={})


class Content(BaseModel):
Expand Down
17 changes: 15 additions & 2 deletions cads_catalogue/entry_points.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def update_catalogue(
contents_folder_path: Optional[
str
] = None, # os.path.join(PACKAGE_DIR, "cads-contents-json"),
contents_config_path: Optional[str] = None,
connection_string: Optional[str] = None,
force: bool = False,
delete_orphans: bool = True,
Expand All @@ -197,6 +198,7 @@ def update_catalogue(
:param licences_folder_path: folder containing metadata files for licences (i.e. cads-licences)
:param cim_folder_path: str = folder containing CIM Quality Assessment layouts (i.e. cads-forms-cim-json)
:param contents_folder_path = folder containing metadata files for contents (i.e. cads-contents-json)
:param contents_config_path = path of the file yaml containing template variables for contents
:param connection_string: something like 'postgresql://user:password@netloc:port/dbname'
:param force: if True, run update regardless input folders has no changes from last update (default False)
:param delete_orphans: if True, delete resources/licences not involved. False if using include/exclude
Expand Down Expand Up @@ -270,6 +272,11 @@ def update_catalogue(
except Exception:
logger.exception(f"not parsable {overrides_path}")
current_override_md = dict()
try:
current_contents_config = contents.yaml2context(contents_config_path)
except Exception:
logger.exception(f"not parsable {contents_config_path}")
current_contents_config = dict()
with session_obj.begin() as session: # type: ignore
logger.info("comparing current input files with the ones of the last run")
current_git_hashes = manager.get_current_git_hashes(
Expand All @@ -279,10 +286,13 @@ def update_catalogue(
session,
*[f[1] for f in paths_db_hash_map],
"override_md",
"contents_config",
)
last_run_git_hashes = last_run_status[:-1]
last_run_override_md = last_run_status[-1]
last_run_git_hashes = last_run_status[:-2]
last_run_override_md = last_run_status[-2]
last_run_contents_config = last_run_status[-1]
override_changed = current_override_md != last_run_override_md
contents_config_changed = current_contents_config != last_run_contents_config
if (
current_git_hashes == last_run_git_hashes
and not force
Expand Down Expand Up @@ -321,6 +331,7 @@ def update_catalogue(
contents_changed = (
current_git_hashes[5] != last_run_git_hashes[5]
or current_git_hashes[5] is None
or contents_config_changed
)
if this_package_changed:
logger.info(
Expand Down Expand Up @@ -401,6 +412,7 @@ def update_catalogue(
session,
contents_folder_path, # type: ignore
storage_settings,
yaml_path=contents_config_path,
)
# delete orphans
if delete_orphans: # -> always false if filtering is active
Expand Down Expand Up @@ -447,6 +459,7 @@ def update_catalogue(
else:
status_info["catalogue_repo_commit"] = current_git_hashes[0]
status_info["override_md"] = current_override_md
status_info["contents_config"] = current_contents_config
logger.info(
"db update of inputs' status (git commit hashes and override metadata)"
)
Expand Down
55 changes: 55 additions & 0 deletions cads_catalogue/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,66 @@
import pathlib
import subprocess
import urllib.parse
from string import Template
from typing import Any

from sqlalchemy import inspect


class CADSTemplate(Template):
"""template using only brace brackets for variables."""

idpattern = "nothing^" # so force to use curly braces
braceidpattern = Template.idpattern


def list_render(
input_list: list[Any], context: dict[str, Any], template_class=CADSTemplate
) -> list[Any]:
"""
render values of an input list according to values defined in a context dictionary.

:param input_list: list with values to be rendered
:param context: dictionary with values to be used for rendering
:param template_class: class to be used for templating
:return: input_list with values rendered
"""
output_list = []
for item in input_list:
output_item = item
if isinstance(item, str):
output_item = template_class(item).safe_substitute(context)
elif isinstance(item, dict):
output_item = dict_render(item, context, template_class)
elif isinstance(item, list):
output_item = list_render(item, context, template_class)
output_list.append(output_item)
return output_list


def dict_render(
input_dict, context: dict[str, Any], template_class=CADSTemplate
) -> dict[str, Any]:
"""
render values of an input dictionary according to values defined in a context dictionary.

:param input_dict: dictionary with values to be rendered
:param context: dictionary with values to be used for rendering
:param template_class: class to be used for templating
:return: a_dict with values rendered
"""
output_dict = dict()
for key, value in input_dict.items():
output_dict[key] = value
if isinstance(value, str):
output_dict[key] = template_class(value).safe_substitute(context)
elif isinstance(value, dict):
output_dict[key] = dict_render(value, context, template_class)
elif isinstance(value, list):
output_dict[key] = list_render(value, context, template_class)
return output_dict


def is_url(astring):
"""Return True if `astring is parsable as a URL, False otherwise."""
result = urllib.parse.urlparse(astring)
Expand Down
21 changes: 21 additions & 0 deletions tests/data/cads-contents-json/how-to-api-templated/layout.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"title": "${siteSlug}API setup",
"description": "Access the full data store catalogue of ${siteName}, with search and availability features. Global prop: ${global_prop}",
"body": {
"main": {
"sections": [
{
"id": "main",
"blocks": [
{
"id": "page-content",
"type": "html",
"content": "<div>TODO</div>",
"content_source": "../html_block.html"
}
]
}
]
}
}
}
10 changes: 10 additions & 0 deletions tests/data/cads-contents-json/how-to-api-templated/metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"id": "how-to-api-templated",
"resource_type": "page",
"title": "${siteSlug} API setup",
"abstract": "Access ${global_prop} items of ${siteName} catalogue, with search and availability features",
"site": ["ads"],
"publication_date": "2024-09-13T10:01:50Z",
"update_date": "2024-09-16T02:10:22Z",
"layout": "./layout.json"
}
3 changes: 2 additions & 1 deletion tests/data/cads-contents-json/html_block.html
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
<p>this is a content of a html block</p>
<p>this is a content of a html block</p>
<p>${apiSnippet}</p>
17 changes: 17 additions & 0 deletions tests/data/cads-contents-json/template_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
default:
apiSnippet: |
import something
this_is_a_default_snippet
global_prop: 33
cds:
siteSlug: CDS
siteName: Climate Data Store
apiSnippet: |
import cds_stuff
this_is_cds_snippet
ads:
siteSlug: ADS
siteName: ADS Data Store
apiSnippet: |
import ads_stuff
this_is_ads_snippet
Loading
Loading