diff --git a/.docker_env b/.docker_env index eb1f2a5..8f9d01a 100644 --- a/.docker_env +++ b/.docker_env @@ -13,8 +13,6 @@ ADMIN_PASS=admin # DATA_DIR=/home/nilsnolde/dev/gis-ops/routing-graph-packager/data VALHALLA_URL="http://app" -VALHALLA_IMAGE=gisops/valhalla:latest - POSTGRES_DB=gis POSTGRES_USER=admin POSTGRES_PASS=admin diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 50ea39d..d1fd96d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,7 @@ repos: - - repo: https://github.com/ambv/black - rev: 23.9.1 - hooks: - - id: black - language_version: python3 - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.0.289 + rev: v0.7.4 hooks: - id: ruff args: [--fix] diff --git a/Dockerfile b/Dockerfile index b238c99..7e3a133 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ #--- BEGIN Usual Python stuff --- -FROM ghcr.io/valhalla/valhalla:latest as builder -LABEL maintainer=nils@gis-ops.com +FROM ghcr.io/valhalla/valhalla:latest AS builder +LABEL maintainer="Nils Nolde " WORKDIR /app @@ -15,7 +15,7 @@ RUN apt-get update -y > /dev/null && \ python3-venv \ curl > /dev/null -ENV POETRY_BIN /root/.local/bin/poetry +ENV POETRY_BIN=/root/.local/bin/poetry RUN curl -sSL https://install.python-poetry.org | python && \ $POETRY_BIN config virtualenvs.create false && \ @@ -46,8 +46,8 @@ RUN cd /usr/local/bin && \ for f in valhalla*; do rm $f; done && \ cd .. && mv $preserve ./bin -FROM ubuntu:24.04 as runner_base -MAINTAINER Nils Nolde +FROM ubuntu:24.04 AS runner_base +LABEL maintainer="Nils Nolde " # install Valhalla stuff RUN apt-get update > /dev/null && \ diff --git a/README.md b/README.md index a1b6c39..d0b53b2 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ The default road dataset is the [OSM](openstreetmap.org) planet PBF. If availabl - **data updater**: includes a daily OSM updater - **asynchronous API**: graph generation is outsourced to a [`ARQ`](https://github.com/samuelcolvin/arq) worker - **email notifications**: notifies the requesting user if the job succeeded/failed +- **logs API** read the logs for the worker, the app and the graph builder via the API ## "Quick Start" @@ -87,3 +88,7 @@ The app is listening on `/api/v1/jobs` for new `POST` requests to generate some - Zip graph tiles from disk according to the request's bounding box and put the package to `$DATA_DIR/output/`, along with a metadata JSON - **busy**, the current job will be put in the queue and will be processed once it reaches the queue's head 4. Send an email to the requesting user with success or failure notice (including the error message) + +### Logs + +The app exposes logs via the route `/api/v1/logs/{log_type}`. Available log types are `worker`, `app` and `builder`. An optional query parameter `?lines={n}` limits the output to the last `n` lines. Authentication is required. diff --git a/cli.py b/cli.py index cc5c4d7..e5660c2 100644 --- a/cli.py +++ b/cli.py @@ -15,7 +15,7 @@ from routing_packager_app import SETTINGS from routing_packager_app.db import get_db from routing_packager_app.api_v1.models import Job, User -from routing_packager_app.logger import AppSmtpHandler, get_smtp_details +from routing_packager_app.logger import AppSmtpHandler, get_smtp_details, LOGGER from routing_packager_app.utils.geom_utils import wkbe_to_geom, wkbe_to_str JOB_TIMEOUT = 60 * 60 # one hour to compress a single graph @@ -23,9 +23,6 @@ description = "Runs the worker to update the ZIP packages." parser = ArgumentParser(description=description) -# set up the logger basics -LOGGER = logging.getLogger("packager") - def _sort_jobs(jobs_: List[Job]): out = list() diff --git a/conf/valhalla.conf b/conf/valhalla.conf index 04fa27d..44e9081 100644 --- a/conf/valhalla.conf +++ b/conf/valhalla.conf @@ -12,8 +12,8 @@ autorestart=false redirect_stderr=true # Either log to file inside the container or # log to PID 1 (gunicorn in this case) so docker logs will show it -# stdout_logfile=/var/log/build_loop.log -# stdout_logfile_maxbytes=1MB -stdout_logfile=/proc/1/fd/1 -stdout_logfile_maxbytes=0 +stdout_logfile=%(ENV_TMP_DATA_DIR)s/logs/builder.log +stdout_logfile_maxbytes=1MB +# stdout_logfile=/proc/1/fd/1 +# stdout_logfile_maxbytes=0 environment=CONCURRENCY="4",DATA_DIR="/app/data",TMP_DATA_DIR="/app/tmp_data" diff --git a/gunicorn.py b/gunicorn.py index 1531fc7..0756ed0 100644 --- a/gunicorn.py +++ b/gunicorn.py @@ -1,3 +1,6 @@ +from routing_packager_app.logger import LOGGING_CONFIG + bind = "0.0.0.0:5000" workers = 1 worker_class = "uvicorn.workers.UvicornWorker" +logconfig_dict = LOGGING_CONFIG diff --git a/main.py b/main.py index 892f978..d1885ef 100644 --- a/main.py +++ b/main.py @@ -24,7 +24,7 @@ async def lifespan(app: FastAPI): p.mkdir(exist_ok=True) SETTINGS.get_output_path().mkdir(exist_ok=True) yield - app.state.redis_pool.shutdown() + await app.state.redis_pool.shutdown() app: FastAPI = create_app(lifespan=lifespan) diff --git a/pyproject.toml b/pyproject.toml index ea22bbd..0451ecc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,24 +44,19 @@ coveralls = "^3.3.1" requires = ["poetry>=1.0.0"] build-backend = "poetry.masonry.api" -[tool.black] -line-length = 105 -exclude = ''' -/( - \.git - | \.venv - | dist - | build -)/ -''' - [tool.ruff] + +extend-exclude = [".venv", "third_party", "build"] +lint.preview = true +format.preview = true + # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default. -select = ["E", "F"] -ignore = [] +lint.select = ["E", "F", "RUF022"] +lint.ignore = [] +line-length = 105 # Allow autofix for all enabled rules (when `--fix`) is provided. -fixable = [ +lint.fixable = [ "A", "B", "C", @@ -106,12 +101,6 @@ fixable = [ "TRY", "UP", "YTT", + "RUF022", ] -unfixable = [] - -# Exclude a variety of commonly ignored directories. -exclude = [".venv", "__pycache__", ".git"] - -# Same as Black. -line-length = 105 -target-version = "py312" +lint.unfixable = [] diff --git a/routing_packager_app/api_v1/__init__.py b/routing_packager_app/api_v1/__init__.py index 5f669e0..24f234f 100644 --- a/routing_packager_app/api_v1/__init__.py +++ b/routing_packager_app/api_v1/__init__.py @@ -1,7 +1,8 @@ from fastapi import APIRouter -from .routes import users, jobs +from .routes import jobs, logs, users api_v1_router = APIRouter() api_v1_router.include_router(jobs.router, prefix="/jobs", tags=["jobs"]) api_v1_router.include_router(users.router, prefix="/users", tags=["users"]) +api_v1_router.include_router(logs.router, prefix="/logs", tags=["logs"]) diff --git a/routing_packager_app/api_v1/models.py b/routing_packager_app/api_v1/models.py index 8c14281..bd9f8be 100644 --- a/routing_packager_app/api_v1/models.py +++ b/routing_packager_app/api_v1/models.py @@ -1,12 +1,13 @@ from datetime import datetime -from typing import Optional, List +from enum import Enum +from typing import List, Optional from fastapi.security import HTTPBasicCredentials from geoalchemy2 import Geography from pydantic import EmailStr from sqlalchemy import Column from sqlalchemy_utils import PasswordType -from sqlmodel import SQLModel, Field, DateTime, Relationship, Session, select, AutoString +from sqlmodel import AutoString, DateTime, Field, Relationship, Session, SQLModel, select from ..config import SETTINGS from ..constants import Providers, Statuses @@ -108,3 +109,9 @@ def add_admin_user(session: Session): admin_user = User(email=admin_email, password=admin_pass) session.add(admin_user) session.commit() + + +class LogType(str, Enum): + WORKER = "worker" + APP = "app" + BUILDER = "builder" diff --git a/routing_packager_app/api_v1/routes/logs.py b/routing_packager_app/api_v1/routes/logs.py new file mode 100644 index 0000000..640f18d --- /dev/null +++ b/routing_packager_app/api_v1/routes/logs.py @@ -0,0 +1,48 @@ +from fastapi import APIRouter, Depends, HTTPException +from fastapi.responses import PlainTextResponse +from fastapi.security import HTTPBasicCredentials +from sqlmodel import Session +from starlette.status import ( + HTTP_400_BAD_REQUEST, + HTTP_401_UNAUTHORIZED, +) + +from ...auth.basic_auth import BasicAuth +from ...config import SETTINGS +from ...db import get_db +from ..models import LogType, User + +router = APIRouter() + + +@router.get("/{log_type}", response_class=PlainTextResponse) +def get_logs( + log_type: LogType, + lines: int | None = None, + db: Session = Depends(get_db), + auth: HTTPBasicCredentials = Depends(BasicAuth), +): + # first authenticate + req_user = User.get_user(db, auth) + if not req_user: + raise HTTPException(HTTP_401_UNAUTHORIZED, "Not authorized to read logs.") + + # figure out the type of logs + log_file = SETTINGS.get_logging_dir() / f"{log_type.value}.log" + + try: + with open(log_file) as fh: + if lines is None: + return fh.read() + line_count = len([1 for _ in fh.readlines()]) + start_i = line_count - lines if line_count > lines else 0 + response = "" + fh.seek(0) + for i, line in enumerate(fh.readlines()): + if i < start_i: + continue + response += line + return response + + except: # noqa + return HTTP_400_BAD_REQUEST(f"Unable to open {log_file}.") diff --git a/routing_packager_app/config.py b/routing_packager_app/config.py index 0b8c97e..f8e63cb 100644 --- a/routing_packager_app/config.py +++ b/routing_packager_app/config.py @@ -18,7 +18,7 @@ class BaseSettings(_BaseSettings): DESCRIPTION_PATH: Path = BASE_DIR.joinpath("DESCRIPTION.md") - ### APP ### + # APP ### ADMIN_EMAIL: str = "admin@example.org" ADMIN_PASS: str = "admin" # TODO: clarify if there's a need to restrict origins @@ -30,7 +30,7 @@ class BaseSettings(_BaseSettings): ENABLED_PROVIDERS: list[str] = list(CommaSeparatedStrings("osm")) - ### DATABASES ### + # DATABASES ### POSTGRES_HOST: str = "localhost" POSTGRES_PORT: int = 5432 POSTGRES_DB: str = "gis" @@ -38,7 +38,7 @@ class BaseSettings(_BaseSettings): POSTGRES_PASS: str = "admin" REDIS_URL: str = "redis://localhost" - ### SMTP ### + # SMTP ### SMTP_HOST: str = "localhost" SMTP_PORT: int = 1025 SMTP_FROM: str = "valhalla@kadas.org" @@ -79,6 +79,19 @@ def get_tmp_data_dir(self) -> Path: return tmp_data_dir + def get_logging_dir(self) -> Path: + """ + Gets the path where logs are stored for both worker and builder/app + """ + tmp_data_dir = self.TMP_DATA_DIR + if os.path.isdir("/app") and not os.getenv("CI", None): # pragma: no cover + tmp_data_dir = Path("/app/tmp_data") + log_dir = tmp_data_dir / "logs" + + log_dir.mkdir(exist_ok=True) + + return log_dir + class ProdSettings(BaseSettings): model_config = SettingsConfigDict(case_sensitive=True, env_file=ENV_FILE, extra="ignore") @@ -108,7 +121,6 @@ class TestSettings(BaseSettings): # decide which settings we'll use SETTINGS: Optional[BaseSettings] = None -print("LOADING SETTINGS") env = os.getenv("API_CONFIG", "prod") if env == "prod": # pragma: no cover SETTINGS = ProdSettings() diff --git a/routing_packager_app/logger.py b/routing_packager_app/logger.py index a50f0d9..f1eb146 100644 --- a/routing_packager_app/logger.py +++ b/routing_packager_app/logger.py @@ -1,5 +1,6 @@ from logging.handlers import SMTPHandler import logging +from logging import config from typing import List # noqa: F401 from .config import SETTINGS @@ -58,3 +59,62 @@ def get_smtp_details(toaddrs: List[str]): conf["secure"] = tuple() return conf + + +LOGGING_CONFIG = { + "version": 1, + "disable_existing_loggers": False, + "root": { + "level": "INFO", + "handlers": ["default"], + }, + "loggers": { + "gunicorn.error": { + "level": "INFO", + "handlers": ["app"], + "propagate": True, + "qualname": "gunicorn.error", + }, + "gunicorn.access": { + "level": "INFO", + "handlers": ["app"], + "propagate": True, + "qualname": "gunicorn.access", + }, + "worker": { + "level": "INFO", + "handlers": ["worker"], + "propagate": True, + "qualname": "gunicorn.access", + }, + }, + "handlers": { + "worker": { + "class": "logging.FileHandler", + "formatter": "worker", + "filename": str(SETTINGS.get_logging_dir() / "worker.log"), + }, + "app": { + "class": "logging.FileHandler", + "formatter": "app", + "filename": str(SETTINGS.get_logging_dir() / "app.log"), + }, + "default": {"class": "logging.StreamHandler", "formatter": "std", "stream": "ext://sys.stdout"}, + }, + "formatters": { + "worker": { + "format": "worker: %(asctime)s [%(process)d] [%(levelname)s] %(message)s", + "datefmt": "[%Y-%m-%d %H:%M:%S %z]", + "class": "logging.Formatter", + }, + "app": { + "format": "app: %(asctime)s [%(process)d] [%(levelname)s] %(message)s", + "datefmt": "[%Y-%m-%d %H:%M:%S %z]", + "class": "logging.Formatter", + }, + "std": {"format": "%(asctime)s [%(process)d] [%(levelname)s] %(message)s"}, + }, +} + +config.dictConfig(LOGGING_CONFIG) +LOGGER = logging.getLogger("worker") diff --git a/routing_packager_app/worker.py b/routing_packager_app/worker.py index b9f844d..7bed36c 100644 --- a/routing_packager_app/worker.py +++ b/routing_packager_app/worker.py @@ -22,12 +22,10 @@ from .db import get_db from .api_v1.models import User, Job from .constants import Statuses -from .logger import AppSmtpHandler, get_smtp_details +from .logger import AppSmtpHandler, get_smtp_details, LOGGER from .utils.file_utils import make_zip from .utils.valhalla_utils import get_tiles_with_bbox -LOGGER = logging.getLogger("packager") - async def create_package( ctx, @@ -107,8 +105,12 @@ async def create_package( out_dir = SETTINGS.get_output_path() lock = out_dir.joinpath(".lock") lock.touch(exist_ok=False) - make_zip(tile_paths, current_valhalla_dir, zip_path) - lock.unlink(missing_ok=False) + try: + make_zip(tile_paths, current_valhalla_dir, zip_path) + except Exception as e: + LOGGER.error(e) + finally: + lock.unlink(missing_ok=False) # Create the meta JSON fname = os.path.basename(zip_path) diff --git a/scripts/docker-entrypoint.sh b/scripts/docker-entrypoint.sh index 520f063..d30418c 100755 --- a/scripts/docker-entrypoint.sh +++ b/scripts/docker-entrypoint.sh @@ -16,6 +16,8 @@ elif [ "${cmd}" == 'app' ]; then echo "No SSL configured." fi + # make sure the log directory exists for supervisor to be able to log + mkdir -p /app/tmp_data/logs # Read the supervisor config and start the build loop service supervisor start supervisorctl start build_loop diff --git a/scripts/run_valhalla.sh b/scripts/run_valhalla.sh index e4640d8..06ad2bc 100755 --- a/scripts/run_valhalla.sh +++ b/scripts/run_valhalla.sh @@ -17,6 +17,10 @@ export LD_LIBRARY_PATH=/usr/local/lib export http_proxy=http://prxp01.admin.ch:8080 export https_proxy=http://prxp01.admin.ch:8080 +log_message() { + echo "build_loop: $(date "+%Y-%m-%d %H:%M:%S") $1" +} + # watch the .lock file every 10 secs wait_for_lock() { count=0 @@ -31,7 +35,7 @@ wait_for_lock() { count=$(( $count + $sleep )) done - echo "ERROR: max count reached" + log_message "ERROR: max count reached" exit 1 } @@ -63,7 +67,7 @@ iteration=0 while true; do (( iteration++ )) - echo "INFO: Starting iteration $iteration..." + log_message "INFO: Starting iteration $iteration..." # Take 8002 if this is the first start # this is copied from FOSSGIS, was too lazy to change to smth more suitable @@ -76,7 +80,7 @@ while true; do OLD_PORT=${PORT_8003} CURRENT_VALHALLA_DIR=$VALHALLA_DIR_8002 elif [[ $iteration != 1 ]]; then - echo "ERROR: Neither localhost:8002 nor localhost:8003 is up." + log_message "ERROR: Neither localhost:8002 nor localhost:8003 is up." exit 1 else CURRENT_PORT=${PORT_8002} @@ -90,7 +94,7 @@ while true; do # download the PBF file if need be UPDATE_OSM="True" if ! [ -f "$PBF" ]; then - echo "INFO: Downloading OSM file $PBF" + log_message "INFO: Downloading OSM file $PBF" wget -nv https://ftp5.gwdg.de/pub/misc/openstreetmap/planet.openstreetmap.org/pbf/planet-latest.osm.pbf -O "$PBF" || exit 1 # wget -nv https://ftp5.gwdg.de/pub/misc/openstreetmap/download.geofabrik.de/germany-latest.osm.pbf -O "$PBF" || exit 1 # wget -nv https://download.geofabrik.de/europe/iceland-latest.osm.pbf -O "$PBF" || exit 1 @@ -99,12 +103,12 @@ while true; do fi if [[ $UPDATE_OSM == "True" ]]; then - echo "INFO: Updating OSM file $PBF" + log_message "INFO: Updating OSM file $PBF" update_osm.sh -p "$PBF" || exit 1 fi # build the current config - echo "INFO: Building valhalla.json to $CURRENT_VALHALLA_DIR" + log_message "INFO: Building valhalla.json to $CURRENT_VALHALLA_DIR" valhalla_config="$CURRENT_VALHALLA_DIR/valhalla.json" valhalla_build_config \ --httpd-service-listen "tcp://*:${CURRENT_PORT}" \ @@ -125,36 +129,36 @@ while true; do reset_config exec valhalla_service "$valhalla_config" 1 & OLD_PID=$! - echo "INFO: Started Valhalla the first time with config $valhalla_config on with PID $OLD_PID" + log_message "INFO: Started Valhalla the first time with config $valhalla_config on with PID $OLD_PID" sleep 10 echo "" echo "" continue fi - echo "INFO: Building initial graph with $PBF..." + log_message "INFO: Building initial graph with $PBF..." valhalla_build_tiles -c "${valhalla_config}" -s initialize -e build "$PBF" || exit 1 - echo "INFO: Downloading elevation to $ELEVATION_DIR..." - valhalla_build_elevation --from-tiles --decompress -c ${valhalla_config} -v || exit 1 + log_message "INFO: Downloading elevation to $ELEVATION_DIR..." + # valhalla_build_elevation --from-tiles --decompress -c ${valhalla_config} -v || exit 1 # debugging with andorra only: - # valhalla_build_elevation --decompress -c ${valhalla_config} -v -b 1,42,2,43 || exit 1 + valhalla_build_elevation --decompress -c ${valhalla_config} -v -b 1,42,2,43 || exit 1 - echo "INFO: Enhancing initial tiles with elevation..." + log_message "INFO: Enhancing initial tiles with elevation..." valhalla_build_tiles -c "${valhalla_config}" -s enhance -e cleanup "$PBF" || exit 1 # reset config so the service won't load the graph reset_config - echo "INFO: Updating the registered packages with $(which python3)" + log_message "INFO: Updating the registered packages with $(which python3)" python3 /app/cli.py # shut down the old service and launch the new one - echo "INFO: Killing Valhalla on port $OLD_PORT with PID $OLD_PID" + log_message "INFO: Killing Valhalla on port $OLD_PORT with PID $OLD_PID" kill -9 $OLD_PID exec valhalla_service "$valhalla_config" 1 & OLD_PID=$! - echo "INFO: Started Valhalla on port $CURRENT_PORT with PID $OLD_PID" + log_message "INFO: Started Valhalla on port $CURRENT_PORT with PID $OLD_PID" sleep 10 echo "" diff --git a/scripts/update_osm.sh b/scripts/update_osm.sh index 6b4e71c..8b4965e 100755 --- a/scripts/update_osm.sh +++ b/scripts/update_osm.sh @@ -20,6 +20,10 @@ usage() echo "usage: update_osm.sh --pbf/-p /app/data/osm/planet-latest.osm.pbf" } +log_message() { + echo "build_loop: $(date "+%Y-%m-%d %H:%M:%S") $1" +} + pbf=/app/tmp_data/osm/planet-latest.osm.pbf # Get the arguments @@ -37,7 +41,7 @@ while [ "$1" != "" ]; do shift done -echo "$(date "+%Y-%m-%d %H:%M:%S") Updating ${pbf} with the proxy settings: http_proxy: $http_proxy, https_proxy: $https_proxy" +log_message "Updating ${pbf} with the proxy settings: http_proxy: $http_proxy, https_proxy: $https_proxy" fn=$(basename "${pbf}") pbf_dir=$(dirname "$pbf") pbf_name_updated="updated_${fn}" diff --git a/tests/env b/tests/env index fbd7cec..82718ab 100644 --- a/tests/env +++ b/tests/env @@ -8,9 +8,6 @@ TMP_DATA_DIR=${PWD}/tests/tmp_data VALHALLA_SERVER_IP="http://localhost" -# The routers you'd like to enable -VALHALLA_IMAGE=gisops/valhalla:latest - POSTGRES_USER=admin POSTGRES_PASS=admin