Skip to content

Commit

Permalink
[dashboard] Added Support for ZIM-discovery
Browse files Browse the repository at this point in the history
On start, a new script is called to list ZIM files in expected location
and updates the Packages YAML

Then the gen-home script is called to generate home from (updated) Packages YAML.

A healthcheck is also added to inform outside that the home is ready so other services
depending on updates Packages.yaml can start
  • Loading branch information
rgaudin committed Dec 21, 2023
1 parent a990ec6 commit bde2e3a
Show file tree
Hide file tree
Showing 4 changed files with 212 additions and 23 deletions.
28 changes: 23 additions & 5 deletions dashboard/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,27 +35,45 @@ RUN \
&& /usr/local/bin/gen-home_env/bin/pip3 install --no-cache-dir -U pip \
&& /usr/local/bin/gen-home_env/bin/pip3 install \
--no-cache-dir \
Jinja2==3.1.2 PyYAML==6.0.1 humanfriendly==10.0 \
Jinja2==3.1.2 PyYAML==6.0.1 humanfriendly==10.0 libzim==3.4.0 \
&& apk del curl

ENV FQDN "generic.hotspot"
ENV NAME "My Hotspot"
# path in which to find code (templates)
ENV SRC_DIR "/src"
# path to packages YAML file
ENV PACKAGES_PATH "/src/home.yaml"
# path to write home HTML and assets file
ENV DEST_DIR "/var/www"
# folder storing ZIM files. unless DONT_UPDATE_PACKAGES, ZimPackages not in the folder
# will be removed (disabled) from packages.yaml
# discovered ZIM (not in YAML) will be added
ENV ZIM_DIR "/data/zims"
# set to skip packages.yaml update on start (reading ZIM_PATH folder)
ENV DONT_UPDATE_PACKAGES ""

# templates to write ZIM Package links to reader and ZIM downloads.
# Available patterns (to be replaced): `{fqdn}`, `{zim_name}`, `{zim_filename}`
ENV KIWIX_READER_LINK_TPL "//kiwix.{fqdn}/viewer#{zim_name}"
ENV KIWIX_DOWNLOAD_LINK_TPL "//zim-download.{fqdn}/{zim_filename}"

# WARN: this break apk but saves a lot of space
# it's OK on prod but comment it during dev if you need packages
RUN apk del apk-tools ca-certificates-bundle

COPY Caddyfile /etc/caddy/
COPY gen-home.py /src/
COPY gen-home.py refresh-zims.py /src/
COPY templates /src/templates
COPY assets /var/www/assets
COPY fallback.html /var/www/fallback.html
COPY home.yaml /src/
COPY lighttpd.conf /etc/lighttpd/
COPY entrypoint.sh /usr/local/bin/

# store python bytecode in image
RUN /usr/local/bin/gen-home_env/bin/python3 -m compileall /src/gen-home.py && mv /src/__pycache__/*.pyc /usr/local/bin/gen-home_env/lib/
RUN /usr/local/bin/gen-home_env/bin/python3 -m compileall /src/gen-home.py /src/refresh-zims.py && mv /src/__pycache__/*.pyc /usr/local/bin/gen-home_env/lib/

ENTRYPOINT ["/usr/bin/dumb-init", "--", "/usr/local/bin/gen-home_env/bin/python3", "/usr/local/bin/gen-home_env/lib/gen-home.cpython-311.pyc"]
HEALTHCHECK --interval=10s --timeout=2s CMD ["/bin/ls", "/tmp/.ready"]
ENTRYPOINT ["/usr/bin/dumb-init", "--", "/usr/local/bin/entrypoint.sh"]
CMD ["lighttpd", "-D", "-f", "/etc/lighttpd/lighttpd.conf"]

14 changes: 14 additions & 0 deletions dashboard/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/sh
set -e

VENV=/usr/local/bin/gen-home_env

# refresh ZIM packages collection (maybe)
$VENV/bin/python3 $VENV/lib/refresh-zims.cpython-311.pyc

# generate homepage from collection
$VENV/bin/python3 $VENV/lib/gen-home.cpython-311.pyc

touch /tmp/.ready

exec "$@"
43 changes: 25 additions & 18 deletions dashboard/gen-home.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,53 @@
#!/usr/bin/env python3

""" Entrypoint-friendly static homepage generation script
""" gen-home: generate static home page from Packages YAML
- Reads Packages YAML from `PACKAGES_PATH`
- Prepares an HTML output with templates defined in `SRC_DIR`/templates
- Writes and index.html in `DEST_DIR`
- Optionally (`DEBUG`) outputs index to stdout as well
Dependencies:
- PyYAML
- Jinja2
- humanfriendly
"""

import os
import pathlib
import re
import sys
import traceback
import urllib.parse
from typing import List, Union

import humanfriendly
from jinja2 import Environment, FileSystemLoader, select_autoescape
from yaml import load as yaml_load
import yaml

try:
from yaml import CLoader as Loader
from yaml import CSafeLoader as SafeLoader
except ImportError:
from yaml import Loader
# we don't NEED cython ext but it's faster so use it if avail.
from yaml import SafeLoader


src_dir = pathlib.Path(os.getenv("SRC_DIR", "/src"))
dest_dir = pathlib.Path(os.getenv("DEST_DIR", "/var/www"))
src_dir = pathlib.Path(os.getenv("SRC_DIR", "/src")).expanduser().resolve()
packages_path = (
pathlib.Path(os.getenv("PACKAGES_PATH", "home.yaml")).expanduser().resolve()
)
dest_dir = pathlib.Path(os.getenv("DEST_DIR", "/var/www")).expanduser().resolve()
templates_dir = src_dir.joinpath("templates")
env = Environment(
loader=FileSystemLoader(templates_dir), autoescape=select_autoescape()
)


def format_fsize(size: Union[str, int]) -> str:
def format_fsize(size: str | int) -> str:
if not str(size).isdigit():
size = humanfriendly.parse_size(size)
size = humanfriendly.parse_size(str(size))
try:
return humanfriendly.format_size(int(size), keep_width=False, binary=True)
except Exception:
return size
return str(size)


env.filters["fsize"] = format_fsize
Expand Down Expand Up @@ -95,19 +104,21 @@ def normalize(url: str) -> str:

@property
def visible(self):
if self.get("disabled", False):
return False
try:
return all([self[key] for key in self.MANDATORY_FIELDS])
except KeyError:
return False

@property
def langs(self) -> List[str]:
def langs(self) -> list[str]:
return [lang[:2] for lang in self.get("languages", [])]


def gen_home(fpath: pathlib.Path):
try:
document = yaml_load(fpath.read_text(), Loader=Loader)
document = yaml.load(fpath.read_text(), Loader=SafeLoader)
except Exception as exc:
print("[CRITICAL] unable to read home YAML document, using fallback homepage")
traceback.print_exception(exc)
Expand All @@ -131,12 +142,8 @@ def gen_home(fpath: pathlib.Path):


if __name__ == "__main__":
gen_home(src_dir / "home.yaml")
gen_home(packages_path)

if Conf.debug:
with open(dest_dir / "index.html", "r") as fh:
print(fh.read())

if len(sys.argv) < 2:
sys.exit(0)
os.execvp(sys.argv[1], sys.argv[1:]) # nosec
150 changes: 150 additions & 0 deletions dashboard/refresh-zims.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
#!/usr/bin/env python3

""" refresh-zims: update YAML packages list using disk-discovered ZIM files
This is an optional feature, yet it is enabled by default.
Set `DONT_UPDATE_PACKAGES` environ to disable
Once enabled, this loops over ZIM files present on disk (at `ZIM_DIR` path)
and updates the Packages YAML (at `PACKAGES_PATH`) accordingly:
- if the package (using its ident) was in YAML: keep YAML entry
- if the ZIM was not in YAML: add entry using in-ZIM info
- entries in YAML for which ZIM is missing are disabled but kept.
Dependencies:
- PyYAML
- libzim
"""
import base64
import os
import pathlib
import traceback
from typing import Any

import yaml
from libzim.reader import Archive

try:
from yaml import CDumper as Dumper
from yaml import CSafeLoader as SafeLoader
except ImportError:
# we don't NEED cython ext but it's faster so use it if avail.
from yaml import Dumper, SafeLoader

packages_path = (
pathlib.Path(os.getenv("PACKAGES_PATH", "home.yaml")).expanduser().resolve()
)
zims_dir = pathlib.Path(os.getenv("ZIM_DIR", "/src"))
dont_update = bool(os.getenv("DONT_UPDATE_PACKAGES", ""))

kiwix_reader_link_tpl = os.getenv("KIWIX_READER_LINK_TPL", "//kiwix.{fqdn}/{zim_name}")
kiwix_download_link_tpl = os.getenv(
"KIWIX_DOWNLOAD_LINK_TPL", "//kiwixdl.{fqdn}/{zim_filename}"
)


def get_metadata(archive: Archive, name: str) -> str:
if name not in archive.metadata_keys:
return ""
return archive.get_metadata(name).decode("UTF-8")


def get_kiwix_url(template: str, fqdn: str, name: str, filename: str) -> str:
return (
template.replace("{fqdn}", fqdn)
.replace("{zim_name}", name)
.replace("{zim_filename}", filename)
)


def get_entry_for(
fpath: pathlib.Path, document_metadata: dict[str, str]
) -> dict[str, Any]:
zim = Archive(fpath)
publisher = get_metadata(zim, "Publisher")
name = get_metadata(zim, "Name")
flavour = get_metadata(zim, "Flavour")
ident = f"{publisher}:{name}:{flavour}"
icon = None
if zim.has_illustration and 48 in zim.get_illustration_sizes():
icon = base64.b64encode(bytes(zim.get_illustration_item(48).content)).decode(
"ASCII"
)
return {
"kind": "zim",
"ident": ident,
"title": get_metadata(zim, "Title"),
"description": get_metadata(zim, "Description"),
"languages": get_metadata(zim, "Language").split(",") or ["eng"],
"tags": get_metadata(zim, "Tags").split(";"),
"url": get_kiwix_url(
template=kiwix_reader_link_tpl,
fqdn=document_metadata["fqdn"],
name=name,
filename=fpath.name,
),
"download": {
"url": get_kiwix_url(
template=kiwix_download_link_tpl,
fqdn=document_metadata["fqdn"],
name=name,
filename=fpath.name,
),
"size": fpath.stat().st_size,
},
"icon": icon,
}


def refresh_zims(
packages_path: pathlib.Path, zims_dir: pathlib.Path, debug: bool | None = False
):
print(f"refreshing ZIMs from {zims_dir=}")
try:
document = yaml.load(packages_path.read_text(), Loader=SafeLoader)
document["packages"]
document["metadata"]
document["metadata"]["fqdn"]
document["metadata"]["name"]
except Exception as exc:
print("[CRITICAL] unable to read home YAML document, skiping")
traceback.print_exception(exc)
return

# copy list of packages from YAML
conf_packages = {
package.get("ident", ""): package for package in document["packages"]
}
document["packages"] = []

for zim_fpath in zims_dir.glob("*.zim"):
try:
package = get_entry_for(zim_fpath, document["metadata"])
except Exception as exc:
print(f"Failed to read from {zim_fpath}, skiping ({exc})")
continue

# reuse package definition from YAML
if package["ident"] in conf_packages:
document["packages"].append(conf_packages[package["ident"]])
continue

# use from-zim package definition from discovered ZIM
document["packages"].append(package)
try:
packages_path.write_text(yaml.dump(document, Dumper=Dumper))
except Exception as exc:
print("[CRITICAL] unable to update(save) home YAML document, skiping")
traceback.print_exception(exc)
return


if __name__ == "__main__":
if not dont_update and zims_dir.exists() and zims_dir.is_dir():
refresh_zims(
packages_path=packages_path,
zims_dir=zims_dir,
debug=bool(os.getenv("DEBUG", False)),
)
else:
print(f"Not refreshing ZIMs for {dont_update=}, {zims_dir=}")

0 comments on commit bde2e3a

Please sign in to comment.