Skip to content

Commit

Permalink
Gsren 237 xsl transform (#36)
Browse files Browse the repository at this point in the history
* enable xslt transformations

* apply review comments

* output transformations in logs
  • Loading branch information
mki-c2c authored Feb 18, 2025
1 parent b8a7347 commit dc3a356
Show file tree
Hide file tree
Showing 11 changed files with 340 additions and 17 deletions.
19 changes: 19 additions & 0 deletions backend/dev_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@ sources:
api_url: "https://demo.georchestra.org/geonetwork/srv/api"
login: "${DEMO_LOGIN}"
password: "${DEMO_LOGIN}"
transformations:
- public_to_prod
- name: "GeonetworkDemoAnonymous"
api_url: "https://demo.georchestra.org/geonetwork/srv/api"
transformations:
- public_to_prod
- prod_to_final_prod
- name: "GeonetworkRennes"
api_url: "https://public.sig.rennesmetropole.fr/geonetwork/srv/api"
login: "${XXX}"
Expand All @@ -26,6 +33,8 @@ destinations:
api_url: "http://proxy:8080/geonetwork/srv/api"
geoserver:
url: "http://proxy:8080/geoserver"
transformations:
- prod_to_final_prod
"CompoLocaleMaelstro":
login: "tmaelstro"
password: "tmaelstro"
Expand All @@ -34,6 +43,16 @@ destinations:
geoserver:
url: "http://proxy:8080/geoserver"


transformations:
public_to_prod:
xsl_path: ./tests/test_public_to_prod.xsl
description: "Transformation from public geoserver to prod server & remove blogs"
prod_to_final_prod:
xsl_path: ./tests/test_prod_to_final_prod.xsl
description: "Transformation from prod server to final prod"


db_logging:
host: "database"
port: "5432"
Expand Down
26 changes: 26 additions & 0 deletions backend/maelstro/config/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import re
import yaml
from functools import cache
from typing import Any
from maelstro.common.types import Credentials, DbConfig

Expand Down Expand Up @@ -79,6 +80,31 @@ def has_db_logging(self) -> bool:
def get_db_config(self) -> DbConfig:
return DbConfig(**self.config.get("db_logging", {}))

def get_transformations(self) -> dict[str, Any]:
return self.config.get("transformations", {}) # type: ignore

def get_transformation_pair(self, src: str, dst: str) -> list[dict[str, Any]]:
return self.get_all_transformation_pairs().get(f"{src} -> {dst}", []) # type: ignore

@cache # pylint: disable=method-cache-max-size-none
def get_all_transformation_pairs(self) -> dict[str, Any]:
transformations = {}
for src in self.config["sources"]["geonetwork_instances"]:
for dst_name, dst in self.config["destinations"].items():
current_transformations = [
self.get_transformations().get(k)
for k in src.get("transformations", [])
]
current_transformations += [
self.get_transformations().get(k)
for k in dst.get("transformations", [])
]
if current_transformations:
transformations[f"{src.get('name')} -> {dst_name}"] = (
current_transformations
)
return transformations

def get_access_info(
self, is_src: bool, is_geonetwork: bool, instance_id: str
) -> dict[str, Any]:
Expand Down
27 changes: 16 additions & 11 deletions backend/maelstro/core/clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,18 +158,23 @@ def _clone_dataset(self, output_format: str) -> str | list[Any]:
self.clone_layers()

if self.copy_meta:
mapping: dict[str, list[str]] = {
"sources": config.get_gs_sources(),
"destinations": [src["gs_url"] for src in config.get_destinations()],
}
pre_info, post_info = self.meta.update_geoverver_urls(mapping)
self.geo_hnd.log_handler.responses.append(
{
"operation": "Update of geoserver links in zip archive",
"before": pre_info,
"after": post_info,
}
xsl_transformations = config.get_transformation_pair(
self.src_name, self.dst_name
)
if xsl_transformations:
transformation_paths = [
trans["xsl_path"] for trans in xsl_transformations
]

pre_info, post_info = self.meta.apply_xslt_chain(transformation_paths)
self.geo_hnd.log_handler.responses.append(
{
"operation": "Apply XSL transformations in zip archive",
"transformations": xsl_transformations,
"before": pre_info,
"after": post_info,
}
)
self.geo_hnd.log_handler.properties["dst_title"] = self.meta.get_title()
results = self.gn_dst.put_record_zip(BytesIO(self.meta.get_zip()))
self.geo_hnd.log_handler.responses.append(
Expand Down
22 changes: 22 additions & 0 deletions backend/maelstro/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,26 @@ async def debug_page(request: Request) -> dict[str, Any]:

@app.get("/check_config")
def check_config(check_credentials: bool = True) -> dict[str, bool]:
"""
This entrypoint is meant to validate the configuration.
- path of the config file (tbc. security issue ??)
- check that the format is correct
- check that all mandatory information is given
- tell which default values are used
:param check_credentials if true, all configured credentials are confirmaed to be valid via
a test connection to each server
To be implemented
"""
# TODO: implement check of all servers configured in the config file
return {"test_conf.yaml": True, "check_credentials": check_credentials}


@app.get("/sources")
def get_sources() -> list[dict[str, str]]:
"""
List all the geonetwork source servers registered in the config file
:returns: A list of each server with the name defined in the config file and the API URL
"""
return config.get_gn_sources()


Expand All @@ -118,6 +132,14 @@ def get_destinations() -> list[dict[str, str]]:
return config.get_destinations()


@app.get("/transformations")
def get_transformations(with_src_dst: bool = False) -> dict[str, Any]:
return {
"Registered transformations": config.get_transformations(),
**(config.get_all_transformation_pairs() if with_src_dst else {}),
}


@app.post("/search/{src_name}")
def post_search(
src_name: str, search_query: Annotated[SearchQuery, Body()]
Expand Down
52 changes: 48 additions & 4 deletions backend/maelstro/metadata/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,35 @@ def get_gslayer_from_gn_link(
workspace_name=ows_url.lstrip("/").split("/")[0], layer_name=layer_name
)

def update_geoverver_urls(self, mapping: dict[str, list[str]]) -> tuple[str, str]:
def _apply_xslt(self, xslt_path: str) -> bytes:
with open(xslt_path, encoding="utf8") as xslt_file:
xslt = etree.parse(xslt_file)
transform = etree.XSLT(xslt)
xml_root = etree.parse(BytesIO(self.xml_bytes))
return self.dump_tree_to_bytes(transform(xml_root))

def apply_xslt(self, xslt_path: str) -> tuple[str, str]:
pre = len(self.xml_bytes)
self.xml_bytes = self._apply_xslt(xslt_path)
post = len(self.xml_bytes)
return f"Before: {pre} bytes", f"After: {post} bytes"

def apply_xslt_chain(self, xslt_paths: list[str]) -> tuple[str, str]:
pre = len(self.xml_bytes)
for xslt_path in xslt_paths:
self.xml_bytes = self._apply_xslt(xslt_path)
post = len(self.xml_bytes)
return f"Before: {pre} bytes", f"After: {post} bytes"

def dump_tree_to_bytes(self, xml_root: etree._ElementTree) -> bytes:
b_io = BytesIO()
xml_root.write(b_io)
b_io.seek(0)
return b_io.read()

def replace_geoserver_src_by_dst_urls(
self, mapping: dict[str, list[str]]
) -> tuple[str, str]:
xml_root = etree.parse(BytesIO(self.xml_bytes))
for url_node in xml_root.findall(
f".//{self.prefix}:CI_OnlineResource/{self.prefix}:linkage/",
Expand All @@ -96,7 +124,7 @@ def update_geoverver_urls(self, mapping: dict[str, list[str]]) -> tuple[str, str
pre = len(self.xml_bytes)
self.xml_bytes = b_io.read()
post = len(self.xml_bytes)
return f"Before: {pre} bytes", f"Before: {post} bytes"
return f"Before: {pre} bytes", f"After: {post} bytes"

def is_ogc_layer(self, link_node: etree._Element) -> bool:
link_protocol = self.protocol_from_link(link_node)
Expand Down Expand Up @@ -147,8 +175,24 @@ def __init__(self, zipfile: bytes):

super().__init__(xml_bytes, schema)

def update_geoverver_urls(self, mapping: dict[str, list[str]]) -> tuple[str, str]:
super().update_geoverver_urls(mapping)
def replace_geoserver_src_by_dst_urls(
self, mapping: dict[str, list[str]]
) -> tuple[str, str]:
ret = super().replace_geoserver_src_by_dst_urls(mapping)
self.update_zip()
return ret

def apply_xslt(self, xslt_path: str) -> tuple[str, str]:
ret = super().apply_xslt(xslt_path)
self.update_zip()
return ret

def apply_xslt_chain(self, xslt_paths: list[str]) -> tuple[str, str]:
ret = super().apply_xslt_chain(xslt_paths)
self.update_zip()
return ret

def update_zip(self) -> tuple[str, str]:
new_bytes = BytesIO(b"")
with ZipFile(BytesIO(self.zipfile), "r") as zf_src:
# get compression type from non directory elements of zip archive
Expand Down
2 changes: 1 addition & 1 deletion backend/tests/test_API.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def test_search5():
"size": 5
})
assert len(response.json()['hits']['hits']) == 5
assert response.json()['hits']['total']['value'] == 388
assert response.json()['hits']['total']['value'] > 5


@pytest.mark.skip("test depends on existing workspace 'PSC', so it does not work in CI yet")
Expand Down
23 changes: 23 additions & 0 deletions backend/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

os.environ["CONFIG_PATH"] = os.path.join(os.path.dirname(__file__), "test_config.yaml")
os.environ["DB_CONFIG_PATH"] = os.path.join(os.path.dirname(__file__), "test_db_config.yaml")
os.environ["XSLT_CONFIG_PATH"] = os.path.join(os.path.dirname(__file__), "test_xslst_config.yaml")


def test_init():
Expand Down Expand Up @@ -128,6 +129,28 @@ def test_get_info():
conf.get_access_info(False, False, "MissingKey")


def test_xslts():
conf = Config("XSLT_CONFIG_PATH")
assert set(conf.get_all_transformation_pairs().keys()) == {
'GeonetworkMaster -> PlateformeProfessionnelle',
'GeonetworkMaster -> PlateformePublique',
'GeonetworkMaster -> CompoLocale',
'GeonetworkRennes -> PlateformePublique',
'GeonetworkRennes -> CompoLocale'
}
assert len(conf.get_transformation_pair("GeonetworkMaster", "CompoLocale")) == 2
assert len(conf.get_transformation_pair("GeonetworkRennes", "CompoLocale")) == 1
assert len(conf.get_transformation_pair("GeonetworkRennes", "PlateformeProfessionelle")) == 0
assert [
el["xsl_path"]
for el in conf.get_transformation_pair("GeonetworkMaster", "CompoLocale")
] == ["./tests/test_public_to_prod.xsl", "./tests/test_xslt_prod_to_final_prod.xsl"]
assert [
el["xsl_path"]
for el in conf.get_transformation_pair("GeonetworkRennes", "PlateformePublique")
] == ["./tests/test_xslt_prod_to_final_prod.xsl", "./tests/test_public_to_prod.xsl"]


def test_doc_sample():
os.environ["SAMPLE_PATH"] = os.path.join(os.path.dirname(__file__), "doc_sample_config.yaml")
os.environ["PASSWORD_B"] = "pwB"
Expand Down
27 changes: 26 additions & 1 deletion backend/tests/test_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,36 @@ def test_replace_geoserver():
with open(os.path.join(os.path.dirname(__file__), 'demo_iso19139.zip'), 'rb') as zf:
mm = Meta(zf.read())
assert mm.xml_bytes.find(b"https://public.sig.rennesmetropole.fr/geoserver") >= 0
mm.update_geoverver_urls(
mm.replace_geoserver_src_by_dst_urls(
{
"sources": ["https://public.sig.rennesmetropole.fr/geoserver"],
"destinations": ["https://prod.sig.rennesmetropole.fr/geoserver"],
}
)
assert mm.xml_bytes.find(b"https://public.sig.rennesmetropole.fr/geoserver") == -1
assert mm.xml_bytes.find(b"https://prod.sig.rennesmetropole.fr/geoserver") >= 0


def test_xslt():
with open(os.path.join(os.path.dirname(__file__), 'demo_iso19139.zip'), 'rb') as zf:
mm = Meta(zf.read())
assert mm.xml_bytes.find(b"https://public.sig.rennesmetropole.fr/geoserver") >= 0
assert mm.xml_bytes.find("Lien de téléchargement direct (GML3 EPSG:3948)".encode()) >= 0
mm.apply_xslt(os.path.join(os.path.dirname(__file__), "test_public_to_prod.xsl"))
assert mm.xml_bytes.find(b"https://prod.sig.rennesmetropole.fr/geoserver") >= 0
assert mm.xml_bytes.find("Lien de téléchargement direct (GML3 EPSG:3948)".encode()) == -1


def test_xslt_chain():
with open(os.path.join(os.path.dirname(__file__), 'demo_iso19139.zip'), 'rb') as zf:
mm = Meta(zf.read())
assert mm.xml_bytes.find(b"https://public.sig.rennesmetropole.fr/geoserver") >= 0
assert mm.xml_bytes.find("Lien de téléchargement direct (GML3 EPSG:3948)".encode()) >= 0
mm.apply_xslt_chain(
[
os.path.join(os.path.dirname(__file__), "test_public_to_prod.xsl"),
os.path.join(os.path.dirname(__file__), "test_prod_to_final_prod.xsl"),
]
)
assert mm.xml_bytes.find(b"https://final_prod.sig.rennesmetropole.fr/geoserver") >= 0
assert mm.xml_bytes.find("Lien de téléchargement direct (GML3 EPSG:3948)".encode()) == -1
45 changes: 45 additions & 0 deletions backend/tests/test_prod_to_final_prod.xsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet
version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:gmd="http://www.isotc211.org/2005/gmd"
xmlns:srv="http://www.isotc211.org/2005/srv"
xmlns:gco="http://www.isotc211.org/2005/gco"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:gmx="http://www.isotc211.org/2005/gmx"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:gfc="http://www.isotc211.org/2005/gfc"
xmlns:gts="http://www.isotc211.org/2005/gts"
xmlns:gml="http://www.opengis.net/gml"
xmlns:geonet="http://www.fao.org/geonetwork">

<xsl:output omit-xml-declaration="yes" indent="yes"/>
<xsl:param name="URL">gmd:URL</xsl:param>
<xsl:param name="urlPrefixSearchPortail">https://prod.sig.rennesmetropole.fr</xsl:param>
<xsl:param name="urlPrefixCiblePortail">https://final_prod.sig.rennesmetropole.fr</xsl:param>

<xsl:template match="@* | node()">
<xsl:copy>


<xsl:choose>
<xsl:when test="name()=$URL">
<!-- Process N°1
Portail public transformation des urls des capacités WMS et WFS
portail.sig en public.sig -->
<xsl:if test="(contains(text(), $urlPrefixSearchPortail))">
<xsl:value-of select="concat($urlPrefixCiblePortail,substring-after(text(),$urlPrefixSearchPortail))">
</xsl:value-of>
</xsl:if>
<xsl:if test="not(contains(text(), $urlPrefixSearchPortail))">
<xsl:apply-templates select="@* | node()"/>
</xsl:if>
</xsl:when>
<xsl:otherwise>
<xsl:apply-templates select="@* | node()"/>
</xsl:otherwise>
</xsl:choose>
</xsl:copy>
</xsl:template>

</xsl:stylesheet>
Loading

0 comments on commit dc3a356

Please sign in to comment.