Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(logging): Provide JSON-formatted structured logging as option #1308

67 changes: 53 additions & 14 deletions mlserver/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@
from .settings import Settings

LoggerName = "mlserver"
LoggerFormat = "%(asctime)s [%(name)s]%(model)s %(levelname)s - %(message)s"

logger = logging.getLogger(LoggerName)

_STREAM_HANDLER_NAME = "stdout_stream_handler"


def get_logger():
return logger
Expand All @@ -37,37 +38,75 @@ def apply_logging_file(logging_settings: Union[str, Dict]):


class ModelLoggerFormatter(logging.Formatter):
"""
A logging formatter that uses context variables to inject
the model name and version in the log message.
"""
"""Log formatter incorporating model details, e.g. name and version."""

_UNSTRUCTURED_FORMAT = "%(asctime)s [%(name)s]%(model)s %(levelname)s - %(message)s"
_STRUCTURED_FORMAT = (
'{"time": "%(asctime)s", "name": "%(name)s", "level": "%(levelname)s", '
'"message": "%(message)s" %(model)s}'
)

def __init__(self, settings: Optional[Settings]):
self.use_structured_logging = (
settings is not None and settings.use_structured_logging
)
super().__init__(
self._STRUCTURED_FORMAT
if self.use_structured_logging
else self._UNSTRUCTURED_FORMAT
)

@staticmethod
def _format_unstructured_model_details(name: str, version: str) -> str:
if not name:
return ""
elif not version:
return f"[{name}]"
else:
return f"[{name}:{version}]"

@staticmethod
def _fmt_model(name: str, version: str) -> str:
def _format_structured_model_details(name: str, version: str) -> str:
if not name:
return ""
model_fmt = f"{name}:{version}" if version else name
return f"[{model_fmt}]"
model_details = f', "model_name": "{name}"'
if version:
model_details += f', "model_version": "{version}"'
return model_details

def format(self, record: logging.LogRecord) -> str:
model_name = model_name_var.get("")
model_version = model_version_var.get("")

record.model = self._fmt_model(model_name, model_version)
record.model = (
self._format_structured_model_details(model_name, model_version)
if self.use_structured_logging
else self._format_unstructured_model_details(model_name, model_version)
)

return super().format(record)


def _find_handler(
logger: logging.Logger, handler_name: str
) -> Optional[logging.Handler]:
for h in logger.handlers:
if h.get_name() == handler_name:
return h
return None


def configure_logger(settings: Optional[Settings] = None):
logger = get_logger()

# Don't add handler twice
if not logger.handlers:
stream_handler = StreamHandler(sys.stdout)
formatter = ModelLoggerFormatter(LoggerFormat)
stream_handler.setFormatter(formatter)
handler = _find_handler(logger, _STREAM_HANDLER_NAME)
if handler is None:
handler = StreamHandler(sys.stdout)
handler.set_name(_STREAM_HANDLER_NAME)
logger.addHandler(handler)

logger.addHandler(stream_handler)
handler.setFormatter(ModelLoggerFormatter(settings))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the CLI is used to run mlserver start ., first the CLI will configure a logger and then start the MLServer(settings) which will then again configure a logger but with settings. This is all within the same thread. In the previous version, the handler was initialised with the wrong formatter first and then when the MLServer class was configuring the logger, it wasn't setting a formatter because there was already a handler for the logger.


logger.setLevel(logging.INFO)
if settings and settings.debug:
Expand Down
2 changes: 2 additions & 0 deletions mlserver/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ class Config:
"""

# Logging settings
use_structured_logging: bool = False
"""Use JSON-formatted structured logging instead of default format."""
logging_settings: Optional[Union[str, Dict]] = None
"""Path to logging config file or dictionary configuration."""

Expand Down
138 changes: 127 additions & 11 deletions tests/test_logging.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,117 @@
import pytest
import json


from mlserver import ModelSettings
from mlserver.context import model_context
from mlserver.logging import (
ModelLoggerFormatter,
LoggerFormat,
configure_logger,
logger,
_STREAM_HANDLER_NAME,
)
from mlserver.settings import ModelParameters, Settings
from tests.fixtures import SumModel
from logging import INFO


@pytest.mark.parametrize(
"name, version, expected_fmt",
"name, version, expected_model_fmt, fmt_present_in_all",
[
(
"foo",
"v1.0",
"[foo:v1.0]",
False,
),
(
"foo",
"",
"[foo]",
False,
),
(
"",
"v1.0",
"",
True,
),
(
"",
"",
"",
True,
),
],
)
def test_model_logging_formatter_unstructured(
name: str,
version: str,
expected_model_fmt: str,
fmt_present_in_all: bool,
settings: Settings,
caplog,
):
settings.use_structured_logging = False
caplog.handler.setFormatter(ModelLoggerFormatter(settings))
caplog.set_level(INFO)

model_settings = ModelSettings(
name=name, implementation=SumModel, parameters=ModelParameters(version=version)
)

logger.info("Before model context")
with model_context(model_settings):
logger.info("Inside model context")
logger.info("After model context")

log_records = caplog.get_records("call")
assert len(log_records) == 3

assert all(hasattr(lr, "model") for lr in log_records)

if fmt_present_in_all:
assert all(lr.model == expected_model_fmt for lr in log_records)
else:
assert expected_model_fmt != log_records[0].model
assert expected_model_fmt == log_records[1].model
assert expected_model_fmt != log_records[2].model


@pytest.mark.parametrize(
"name, version, expected_model_fmt, fmt_present_in_all",
[
(
"foo",
"v1.0",
', "model_name": "foo", "model_version": "v1.0"',
False,
),
(
"foo",
None,
"[foo]",
"",
', "model_name": "foo"',
False,
),
(
"",
"v1.0",
"",
True,
),
("", "", "", True),
],
)
def test_model_logging_formatter(name, version, expected_fmt, caplog):
caplog.handler.setFormatter(ModelLoggerFormatter(LoggerFormat))
def test_model_logging_formatter_structured(
name: str,
version: str,
expected_model_fmt: str,
fmt_present_in_all: bool,
settings: Settings,
caplog,
):
settings.use_structured_logging = True
caplog.handler.setFormatter(ModelLoggerFormatter(settings))
caplog.set_level(INFO)

model_settings = ModelSettings(
Expand All @@ -46,12 +123,18 @@ def test_model_logging_formatter(name, version, expected_fmt, caplog):
logger.info("Inside model context")
logger.info("After model context")

log_records = caplog.text.strip().split("\n")
assert len(caplog.messages) == 3
_ = [json.loads(lr) for lr in caplog.text.strip().split("\n")]
log_records = caplog.get_records("call")
assert len(log_records) == 3

assert all(hasattr(lr, "model") for lr in log_records)

assert expected_fmt not in log_records[0]
assert expected_fmt in log_records[1]
assert expected_fmt not in log_records[2]
if fmt_present_in_all:
assert all(lr.model == expected_model_fmt for lr in log_records)
else:
assert expected_model_fmt != log_records[0].model
assert expected_model_fmt == log_records[1].model
assert expected_model_fmt != log_records[2].model


@pytest.mark.parametrize("debug", [True, False])
Expand All @@ -66,3 +149,36 @@ def test_log_level_gets_persisted(debug: bool, settings: Settings, caplog):
assert test_log_message in caplog.text
else:
assert test_log_message not in caplog.text


def test_configure_logger_when_called_multiple_times_with_same_logger(settings):
logger = configure_logger()

assert len(logger.handlers) == 1
handler = logger.handlers[0]
assert handler.name == _STREAM_HANDLER_NAME
assert (
hasattr(handler.formatter, "use_structured_logging")
and handler.formatter.use_structured_logging is False
)

logger = configure_logger(settings)

assert len(logger.handlers) == 1
handler = logger.handlers[0]
assert handler.name == _STREAM_HANDLER_NAME
assert (
hasattr(handler.formatter, "use_structured_logging")
and handler.formatter.use_structured_logging is False
)

settings.use_structured_logging = True
logger = configure_logger(settings)

assert len(logger.handlers) == 1
handler = logger.handlers[0]
assert handler.name == _STREAM_HANDLER_NAME
assert (
hasattr(handler.formatter, "use_structured_logging")
and handler.formatter.use_structured_logging is True
)