From bde2e3a8bbf77b178918acd20a72e03da33ec71d Mon Sep 17 00:00:00 2001 From: renaud gaudin Date: Thu, 21 Dec 2023 10:50:25 +0000 Subject: [PATCH] [dashboard] Added Support for ZIM-discovery On start, a new script is called to list ZIM files in expected location and updates the Packages YAML Then the gen-home script is called to generate home from (updated) Packages YAML. A healthcheck is also added to inform outside that the home is ready so other services depending on updates Packages.yaml can start --- dashboard/Dockerfile | 28 +++++-- dashboard/entrypoint.sh | 14 ++++ dashboard/gen-home.py | 43 ++++++----- dashboard/refresh-zims.py | 150 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 212 insertions(+), 23 deletions(-) create mode 100755 dashboard/entrypoint.sh create mode 100644 dashboard/refresh-zims.py diff --git a/dashboard/Dockerfile b/dashboard/Dockerfile index 45f1c00..9107644 100644 --- a/dashboard/Dockerfile +++ b/dashboard/Dockerfile @@ -35,27 +35,45 @@ RUN \ && /usr/local/bin/gen-home_env/bin/pip3 install --no-cache-dir -U pip \ && /usr/local/bin/gen-home_env/bin/pip3 install \ --no-cache-dir \ - Jinja2==3.1.2 PyYAML==6.0.1 humanfriendly==10.0 \ + Jinja2==3.1.2 PyYAML==6.0.1 humanfriendly==10.0 libzim==3.4.0 \ && apk del curl ENV FQDN "generic.hotspot" ENV NAME "My Hotspot" +# path in which to find code (templates) +ENV SRC_DIR "/src" +# path to packages YAML file +ENV PACKAGES_PATH "/src/home.yaml" +# path to write home HTML and assets file +ENV DEST_DIR "/var/www" +# folder storing ZIM files. unless DONT_UPDATE_PACKAGES, ZimPackages not in the folder +# will be removed (disabled) from packages.yaml +# discovered ZIM (not in YAML) will be added +ENV ZIM_DIR "/data/zims" +# set to skip packages.yaml update on start (reading ZIM_PATH folder) +ENV DONT_UPDATE_PACKAGES "" + +# templates to write ZIM Package links to reader and ZIM downloads. +# Available patterns (to be replaced): `{fqdn}`, `{zim_name}`, `{zim_filename}` +ENV KIWIX_READER_LINK_TPL "//kiwix.{fqdn}/viewer#{zim_name}" +ENV KIWIX_DOWNLOAD_LINK_TPL "//zim-download.{fqdn}/{zim_filename}" # WARN: this break apk but saves a lot of space # it's OK on prod but comment it during dev if you need packages RUN apk del apk-tools ca-certificates-bundle -COPY Caddyfile /etc/caddy/ -COPY gen-home.py /src/ +COPY gen-home.py refresh-zims.py /src/ COPY templates /src/templates COPY assets /var/www/assets COPY fallback.html /var/www/fallback.html COPY home.yaml /src/ COPY lighttpd.conf /etc/lighttpd/ +COPY entrypoint.sh /usr/local/bin/ # store python bytecode in image -RUN /usr/local/bin/gen-home_env/bin/python3 -m compileall /src/gen-home.py && mv /src/__pycache__/*.pyc /usr/local/bin/gen-home_env/lib/ +RUN /usr/local/bin/gen-home_env/bin/python3 -m compileall /src/gen-home.py /src/refresh-zims.py && mv /src/__pycache__/*.pyc /usr/local/bin/gen-home_env/lib/ -ENTRYPOINT ["/usr/bin/dumb-init", "--", "/usr/local/bin/gen-home_env/bin/python3", "/usr/local/bin/gen-home_env/lib/gen-home.cpython-311.pyc"] +HEALTHCHECK --interval=10s --timeout=2s CMD ["/bin/ls", "/tmp/.ready"] +ENTRYPOINT ["/usr/bin/dumb-init", "--", "/usr/local/bin/entrypoint.sh"] CMD ["lighttpd", "-D", "-f", "/etc/lighttpd/lighttpd.conf"] diff --git a/dashboard/entrypoint.sh b/dashboard/entrypoint.sh new file mode 100755 index 0000000..93c904f --- /dev/null +++ b/dashboard/entrypoint.sh @@ -0,0 +1,14 @@ +#!/bin/sh +set -e + +VENV=/usr/local/bin/gen-home_env + +# refresh ZIM packages collection (maybe) +$VENV/bin/python3 $VENV/lib/refresh-zims.cpython-311.pyc + +# generate homepage from collection +$VENV/bin/python3 $VENV/lib/gen-home.cpython-311.pyc + +touch /tmp/.ready + +exec "$@" diff --git a/dashboard/gen-home.py b/dashboard/gen-home.py index 28a3b6b..e24cf61 100755 --- a/dashboard/gen-home.py +++ b/dashboard/gen-home.py @@ -1,8 +1,14 @@ #!/usr/bin/env python3 -""" Entrypoint-friendly static homepage generation script +""" gen-home: generate static home page from Packages YAML + + - Reads Packages YAML from `PACKAGES_PATH` + - Prepares an HTML output with templates defined in `SRC_DIR`/templates + - Writes and index.html in `DEST_DIR` + - Optionally (`DEBUG`) outputs index to stdout as well Dependencies: + - PyYAML - Jinja2 - humanfriendly """ @@ -10,35 +16,38 @@ import os import pathlib import re -import sys import traceback import urllib.parse -from typing import List, Union import humanfriendly from jinja2 import Environment, FileSystemLoader, select_autoescape -from yaml import load as yaml_load +import yaml try: - from yaml import CLoader as Loader + from yaml import CSafeLoader as SafeLoader except ImportError: - from yaml import Loader + # we don't NEED cython ext but it's faster so use it if avail. + from yaml import SafeLoader + -src_dir = pathlib.Path(os.getenv("SRC_DIR", "/src")) -dest_dir = pathlib.Path(os.getenv("DEST_DIR", "/var/www")) +src_dir = pathlib.Path(os.getenv("SRC_DIR", "/src")).expanduser().resolve() +packages_path = ( + pathlib.Path(os.getenv("PACKAGES_PATH", "home.yaml")).expanduser().resolve() +) +dest_dir = pathlib.Path(os.getenv("DEST_DIR", "/var/www")).expanduser().resolve() templates_dir = src_dir.joinpath("templates") env = Environment( loader=FileSystemLoader(templates_dir), autoescape=select_autoescape() ) -def format_fsize(size: Union[str, int]) -> str: +def format_fsize(size: str | int) -> str: if not str(size).isdigit(): - size = humanfriendly.parse_size(size) + size = humanfriendly.parse_size(str(size)) try: return humanfriendly.format_size(int(size), keep_width=False, binary=True) except Exception: - return size + return str(size) env.filters["fsize"] = format_fsize @@ -95,19 +104,21 @@ def normalize(url: str) -> str: @property def visible(self): + if self.get("disabled", False): + return False try: return all([self[key] for key in self.MANDATORY_FIELDS]) except KeyError: return False @property - def langs(self) -> List[str]: + def langs(self) -> list[str]: return [lang[:2] for lang in self.get("languages", [])] def gen_home(fpath: pathlib.Path): try: - document = yaml_load(fpath.read_text(), Loader=Loader) + document = yaml.load(fpath.read_text(), Loader=SafeLoader) except Exception as exc: print("[CRITICAL] unable to read home YAML document, using fallback homepage") traceback.print_exception(exc) @@ -131,12 +142,8 @@ def gen_home(fpath: pathlib.Path): if __name__ == "__main__": - gen_home(src_dir / "home.yaml") + gen_home(packages_path) if Conf.debug: with open(dest_dir / "index.html", "r") as fh: print(fh.read()) - - if len(sys.argv) < 2: - sys.exit(0) - os.execvp(sys.argv[1], sys.argv[1:]) # nosec diff --git a/dashboard/refresh-zims.py b/dashboard/refresh-zims.py new file mode 100644 index 0000000..3cbb85d --- /dev/null +++ b/dashboard/refresh-zims.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 + +""" refresh-zims: update YAML packages list using disk-discovered ZIM files + + This is an optional feature, yet it is enabled by default. + Set `DONT_UPDATE_PACKAGES` environ to disable + + Once enabled, this loops over ZIM files present on disk (at `ZIM_DIR` path) + and updates the Packages YAML (at `PACKAGES_PATH`) accordingly: + - if the package (using its ident) was in YAML: keep YAML entry + - if the ZIM was not in YAML: add entry using in-ZIM info + - entries in YAML for which ZIM is missing are disabled but kept. + + Dependencies: + - PyYAML + - libzim +""" +import base64 +import os +import pathlib +import traceback +from typing import Any + +import yaml +from libzim.reader import Archive + +try: + from yaml import CDumper as Dumper + from yaml import CSafeLoader as SafeLoader +except ImportError: + # we don't NEED cython ext but it's faster so use it if avail. + from yaml import Dumper, SafeLoader + +packages_path = ( + pathlib.Path(os.getenv("PACKAGES_PATH", "home.yaml")).expanduser().resolve() +) +zims_dir = pathlib.Path(os.getenv("ZIM_DIR", "/src")) +dont_update = bool(os.getenv("DONT_UPDATE_PACKAGES", "")) + +kiwix_reader_link_tpl = os.getenv("KIWIX_READER_LINK_TPL", "//kiwix.{fqdn}/{zim_name}") +kiwix_download_link_tpl = os.getenv( + "KIWIX_DOWNLOAD_LINK_TPL", "//kiwixdl.{fqdn}/{zim_filename}" +) + + +def get_metadata(archive: Archive, name: str) -> str: + if name not in archive.metadata_keys: + return "" + return archive.get_metadata(name).decode("UTF-8") + + +def get_kiwix_url(template: str, fqdn: str, name: str, filename: str) -> str: + return ( + template.replace("{fqdn}", fqdn) + .replace("{zim_name}", name) + .replace("{zim_filename}", filename) + ) + + +def get_entry_for( + fpath: pathlib.Path, document_metadata: dict[str, str] +) -> dict[str, Any]: + zim = Archive(fpath) + publisher = get_metadata(zim, "Publisher") + name = get_metadata(zim, "Name") + flavour = get_metadata(zim, "Flavour") + ident = f"{publisher}:{name}:{flavour}" + icon = None + if zim.has_illustration and 48 in zim.get_illustration_sizes(): + icon = base64.b64encode(bytes(zim.get_illustration_item(48).content)).decode( + "ASCII" + ) + return { + "kind": "zim", + "ident": ident, + "title": get_metadata(zim, "Title"), + "description": get_metadata(zim, "Description"), + "languages": get_metadata(zim, "Language").split(",") or ["eng"], + "tags": get_metadata(zim, "Tags").split(";"), + "url": get_kiwix_url( + template=kiwix_reader_link_tpl, + fqdn=document_metadata["fqdn"], + name=name, + filename=fpath.name, + ), + "download": { + "url": get_kiwix_url( + template=kiwix_download_link_tpl, + fqdn=document_metadata["fqdn"], + name=name, + filename=fpath.name, + ), + "size": fpath.stat().st_size, + }, + "icon": icon, + } + + +def refresh_zims( + packages_path: pathlib.Path, zims_dir: pathlib.Path, debug: bool | None = False +): + print(f"refreshing ZIMs from {zims_dir=}") + try: + document = yaml.load(packages_path.read_text(), Loader=SafeLoader) + document["packages"] + document["metadata"] + document["metadata"]["fqdn"] + document["metadata"]["name"] + except Exception as exc: + print("[CRITICAL] unable to read home YAML document, skiping") + traceback.print_exception(exc) + return + + # copy list of packages from YAML + conf_packages = { + package.get("ident", ""): package for package in document["packages"] + } + document["packages"] = [] + + for zim_fpath in zims_dir.glob("*.zim"): + try: + package = get_entry_for(zim_fpath, document["metadata"]) + except Exception as exc: + print(f"Failed to read from {zim_fpath}, skiping ({exc})") + continue + + # reuse package definition from YAML + if package["ident"] in conf_packages: + document["packages"].append(conf_packages[package["ident"]]) + continue + + # use from-zim package definition from discovered ZIM + document["packages"].append(package) + try: + packages_path.write_text(yaml.dump(document, Dumper=Dumper)) + except Exception as exc: + print("[CRITICAL] unable to update(save) home YAML document, skiping") + traceback.print_exception(exc) + return + + +if __name__ == "__main__": + if not dont_update and zims_dir.exists() and zims_dir.is_dir(): + refresh_zims( + packages_path=packages_path, + zims_dir=zims_dir, + debug=bool(os.getenv("DEBUG", False)), + ) + else: + print(f"Not refreshing ZIMs for {dont_update=}, {zims_dir=}")