Skip to content

Commit

Permalink
structured logging (#93)
Browse files Browse the repository at this point in the history
## Description
Setting up the project to emit structured (JSON) logs in production so
its easier to index and query in Splunk. Also adding a request
correlation id to each request, so we can group together different
logging messages.

## Related Issues
closes #84 

## Additional Notes
- adding new default logging configurations for development and
production (the latter includes correlation ids and outputs logs as
JSON)
- adding the LOG_CONFIG env variable for selecting the logging
configuration to use
- adding middleware to append a correlation id to the request (this is
so we can group logs together based on the same request) and another for
writing a custom access log
- removing the `recordlinker.linkage` module, as its no longer being
used
  • Loading branch information
ericbuckley authored Oct 25, 2024
1 parent c85f555 commit 0000ec5
Show file tree
Hide file tree
Showing 17 changed files with 390 additions and 144 deletions.
11 changes: 6 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ ENV USE_OTEL=${USE_OTEL}
# Set the USE_MSSQL env variable to true to enable SQL Server support
ARG USE_MSSQL=true
ENV USE_MSSQL=${USE_MSSQL}
# Set default log config
ARG LOG_CONFIG=/code/assets/production_log_config.json
ENV LOG_CONFIG=${LOGGING_CONFIG}

# Updgrade system packages and install curl
RUN apt-get update && apt-get upgrade -y && apt-get install curl -y
Expand All @@ -27,7 +30,7 @@ RUN mkdir -p /code/src/recordlinker
# Copy over just the pyproject.toml file and install the dependencies doing this
# before copying the rest of the code allows for caching of the dependencies
COPY ./pyproject.toml /code/pyproject.toml
RUN pip install '.'
RUN pip install '.[prod]'

# Conditionally install OpenTelemetry packages if USE_OTEL is true
RUN if [ "$USE_OTEL" = "true" ]; then \
Expand All @@ -46,9 +49,7 @@ EXPOSE 8080
# Conditionally run the application with or without OpenTelemetry
CMD if [ "$USE_OTEL" = "true" ]; then \
opentelemetry-instrument --service_name recordlinker \
uvicorn recordlinker.main:app --app-dir src --host 0 --port 8080 \
--log-config src/recordlinker/log_config.yml; \
uvicorn recordlinker.main:app --host 0 --port 8080; \
else \
uvicorn recordlinker.main:app --app-dir src --host 0 --port 8080 \
--log-config src/recordlinker/log_config.yml; \
uvicorn recordlinker.main:app --host 0 --port 8080; \
fi
70 changes: 70 additions & 0 deletions assets/production_log_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
{
"version": 1,
"disable_existing_loggers": false,
"filters": {
"correlation_id": {
"()": "asgi_correlation_id.CorrelationIdFilter",
"default_value": "-"
},
"dict_values": {
"()": "recordlinker.log.DictArgFilter"
}
},
"formatters": {
"default": {
"()": "recordlinker.log.JSONFormatter",
"format": "%(levelname)s %(name)s %(message)s %(correlation_id)s",
"timestamp": true
},
"access": {
"()": "recordlinker.log.JSONFormatter",
"fmt": "%(message)s",
"static_fields": {"message": "ACCESS"}
}
},
"handlers": {
"console": {
"formatter": "default",
"class": "logging.StreamHandler",
"filters": ["correlation_id"],
"stream": "ext://sys.stderr"
},
"access": {
"formatter": "access",
"class": "logging.StreamHandler",
"filters": ["dict_values"],
"stream": "ext://sys.stdout"
}
},
"loggers": {
"": {
"handlers": ["console"],
"level": "WARNING"
},
"uvicorn": {
"handlers": ["console"],
"level": "INFO",
"propagate": false
},
"uvicorn.error": {
"handlers": ["console"],
"level": "INFO",
"propagate": false
},
"uvicorn.access": {
"handlers": ["console"],
"level": "CRITICAL",
"propagate": false
},
"recordlinker": {
"handlers": ["console"],
"level": "INFO",
"propagate": false
},
"recordlinker.access": {
"handlers": ["access"],
"level": "INFO",
"propagate": false
}
}
}
1 change: 1 addition & 0 deletions compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ services:
args:
USE_OTEL: ${USE_OTEL:-true}
USE_MSSQL: ${USE_MSSQL:-false}
LOG_CONFIG: ""
ports:
- "8080:8080"
environment:
Expand Down
7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ dependencies = [
"rapidfuzz",
"opentelemetry-api",
"opentelemetry-sdk",
"python-json-logger",
"asgi-correlation-id",
# Database drivers
"psycopg2-binary", # PostgreSQL
"PyMySQL", # MySQL & MariaDB
Expand All @@ -37,6 +39,7 @@ dependencies = [

[project.optional-dependencies]
dev = [
# development-only dependencies here
"fastapi[standard]",
"pytest>=8.3",
"pytest-cov",
Expand All @@ -46,7 +49,7 @@ dev = [
"types-python-dateutil"
]
prod = [
# List any additional production-only dependencies here
# production-only dependencies here
]

[tool.setuptools]
Expand Down Expand Up @@ -94,5 +97,3 @@ env = [
[tool.mypy]
files = ["src"]
mypy_path = ["src"]
# TODO: Resolve static typing issues in main.py
exclude = ["src/recordlinker/main.py", "src/recordlinker/linkage"]
4 changes: 2 additions & 2 deletions scripts/local_server.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# Run the API server locally.
#
# Usage: scripts/local_service.sh <port>
# Usage: scripts/local_server.sh <port>
#
# <port> is the port on which to run the API server. If not specified, the server
# will run on port 8000.
Expand All @@ -14,4 +14,4 @@ cd "$(dirname "$0")/.."
PORT=${1:-8000}

# Start the API server
uvicorn recordlinker.main:app --app-dir src --reload --reload-dir src/ --host 0 --port ${PORT} --log-config src/recordlinker/log_config.yml
uvicorn recordlinker.main:app --reload --reload-dir src/ --port ${PORT}
4 changes: 3 additions & 1 deletion scripts/open_pulls.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ results=""
while IFS= read -r pr; do
number=$(echo "$pr" | jq -r '.number')
# get the timestamp of the ready_for_review event
ready_for_review=$(gh_api issues/${number}/timeline | jq -r 'map(select(.event == "ready_for_review")) | .[0].created_at')
# TODO: this call assumes a PR will have less than 100 timeline events,
# which is not always the case, we should handle pagination
ready_for_review=$(gh_api "issues/${number}/timeline?per_page=100" | jq -r 'map(select(.event == "ready_for_review")) | .[0].created_at')
# calculate the number of days since the PR was ready for review
# only calculate if ready_for_review does not equal "null"
if [ "$ready_for_review" == "null" ]; then
Expand Down
2 changes: 2 additions & 0 deletions src/recordlinker/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# initialize the configuration early
import recordlinker.config # noqa: F401
54 changes: 54 additions & 0 deletions src/recordlinker/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json
import logging.config
import typing

import pydantic
Expand Down Expand Up @@ -33,6 +35,10 @@ class Settings(pydantic_settings.BaseSettings):
"above the connection pool size",
default=10,
)
log_config: typing.Optional[str] = pydantic.Field(
description="The path to the logging configuration file",
default="",
)
initial_algorithms: str = pydantic.Field(
description=(
"The path to the initial algorithms file that is loaded on startup if the "
Expand All @@ -42,5 +48,53 @@ class Settings(pydantic_settings.BaseSettings):
default="assets/initial_algorithms.json",
)

def default_log_config(self) -> dict:
"""
Return the default logging configuration.
"""
return {
"version": 1,
"disable_existing_loggers": False,
"filters": {"key_value": {"()": "recordlinker.log.KeyValueFilter"}},
"formatters": {
"default": {
"()": "uvicorn.logging.DefaultFormatter",
"fmt": "%(levelprefix)s [%(asctime)s] ... %(message)s",
"datefmt": "%H:%M:%S",
}
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"formatter": "default",
"filters": ["key_value"],
"stream": "ext://sys.stderr",
}
},
"loggers": {
"": {"handlers": ["console"], "level": "WARNING"},
"recordlinker": {"handlers": ["console"], "level": "INFO", "propagate": False},
"recordlinker.access": {"handlers": ["console"], "level": "CRITICAL", "propagate": False},
},
}

def configure_logging(self) -> None:
"""
Configure logging based on the provided configuration file. If no configuration
file is provided, use the default configuration.
"""
config = None
if self.log_config:
# Load logging config from the provided file
try:
with open(self.log_config, "r") as fobj:
config = json.loads(fobj.read())
except Exception as exc:
raise ConfigurationError(
f"Error loading log configuration: {self.log_config}"
) from exc
logging.config.dictConfig(config or self.default_log_config())


settings = Settings() # type: ignore
settings.configure_logging()
Empty file.
97 changes: 0 additions & 97 deletions src/recordlinker/linkage/algorithms.py

This file was deleted.

44 changes: 44 additions & 0 deletions src/recordlinker/log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import logging
import typing

import pythonjsonlogger.jsonlogger

RESERVED_ATTRS = pythonjsonlogger.jsonlogger.RESERVED_ATTRS + ("taskName",)


# Custom filter to transform log arguments into JSON fields
class DictArgFilter(logging.Filter):
def filter(self, record):
"""
Filter the log record to extract the dictionary arguments as fields.
"""
# if the args are a dictionary, set the key-value pairs as attributes
if isinstance(record.args, dict):
for key, value in record.args.items():
setattr(record, key, value)
return True


class KeyValueFilter(logging.Filter):
def filter(self, record):
"""
Filter the log record to extract the key-value pairs from the log message.
"""
for key, value in record.__dict__.items():
if key not in RESERVED_ATTRS:
record.msg = f"{record.msg} {key}={value}"
return True


class JSONFormatter(pythonjsonlogger.jsonlogger.JsonFormatter):
"""
A custom JSON formatter that excldues the taskName field by default.
"""

def __init__(
self,
*args: typing.Any,
reserved_attrs: tuple[str, ...] = RESERVED_ATTRS,
**kwargs: typing.Any,
):
super().__init__(*args, reserved_attrs=reserved_attrs, **kwargs)
Loading

0 comments on commit 0000ec5

Please sign in to comment.