diff --git a/mlserver/logging.py b/mlserver/logging.py index c19158da6..ca2ad9183 100644 --- a/mlserver/logging.py +++ b/mlserver/logging.py @@ -11,10 +11,11 @@ from .settings import Settings LoggerName = "mlserver" -LoggerFormat = "%(asctime)s [%(name)s]%(model)s %(levelname)s - %(message)s" logger = logging.getLogger(LoggerName) +_STREAM_HANDLER_NAME = "stdout_stream_handler" + def get_logger(): return logger @@ -37,37 +38,75 @@ def apply_logging_file(logging_settings: Union[str, Dict]): class ModelLoggerFormatter(logging.Formatter): - """ - A logging formatter that uses context variables to inject - the model name and version in the log message. - """ + """Log formatter incorporating model details, e.g. name and version.""" + + _UNSTRUCTURED_FORMAT = "%(asctime)s [%(name)s]%(model)s %(levelname)s - %(message)s" + _STRUCTURED_FORMAT = ( + '{"time": "%(asctime)s", "name": "%(name)s", "level": "%(levelname)s", ' + '"message": "%(message)s" %(model)s}' + ) + + def __init__(self, settings: Optional[Settings]): + self.use_structured_logging = ( + settings is not None and settings.use_structured_logging + ) + super().__init__( + self._STRUCTURED_FORMAT + if self.use_structured_logging + else self._UNSTRUCTURED_FORMAT + ) + + @staticmethod + def _format_unstructured_model_details(name: str, version: str) -> str: + if not name: + return "" + elif not version: + return f"[{name}]" + else: + return f"[{name}:{version}]" @staticmethod - def _fmt_model(name: str, version: str) -> str: + def _format_structured_model_details(name: str, version: str) -> str: if not name: return "" - model_fmt = f"{name}:{version}" if version else name - return f"[{model_fmt}]" + model_details = f', "model_name": "{name}"' + if version: + model_details += f', "model_version": "{version}"' + return model_details def format(self, record: logging.LogRecord) -> str: model_name = model_name_var.get("") model_version = model_version_var.get("") - record.model = self._fmt_model(model_name, model_version) + record.model = ( + self._format_structured_model_details(model_name, model_version) + if self.use_structured_logging + else self._format_unstructured_model_details(model_name, model_version) + ) return super().format(record) +def _find_handler( + logger: logging.Logger, handler_name: str +) -> Optional[logging.Handler]: + for h in logger.handlers: + if h.get_name() == handler_name: + return h + return None + + def configure_logger(settings: Optional[Settings] = None): logger = get_logger() # Don't add handler twice - if not logger.handlers: - stream_handler = StreamHandler(sys.stdout) - formatter = ModelLoggerFormatter(LoggerFormat) - stream_handler.setFormatter(formatter) + handler = _find_handler(logger, _STREAM_HANDLER_NAME) + if handler is None: + handler = StreamHandler(sys.stdout) + handler.set_name(_STREAM_HANDLER_NAME) + logger.addHandler(handler) - logger.addHandler(stream_handler) + handler.setFormatter(ModelLoggerFormatter(settings)) logger.setLevel(logging.INFO) if settings and settings.debug: diff --git a/mlserver/settings.py b/mlserver/settings.py index 41fc90fa8..4d631c5a8 100644 --- a/mlserver/settings.py +++ b/mlserver/settings.py @@ -221,6 +221,8 @@ class Config: """ # Logging settings + use_structured_logging: bool = False + """Use JSON-formatted structured logging instead of default format.""" logging_settings: Optional[Union[str, Dict]] = None """Path to logging config file or dictionary configuration.""" diff --git a/tests/test_logging.py b/tests/test_logging.py index f315dd96e..f39d97f4b 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -1,12 +1,14 @@ import pytest +import json + from mlserver import ModelSettings from mlserver.context import model_context from mlserver.logging import ( ModelLoggerFormatter, - LoggerFormat, configure_logger, logger, + _STREAM_HANDLER_NAME, ) from mlserver.settings import ModelParameters, Settings from tests.fixtures import SumModel @@ -14,27 +16,102 @@ @pytest.mark.parametrize( - "name, version, expected_fmt", + "name, version, expected_model_fmt, fmt_present_in_all", [ ( "foo", "v1.0", "[foo:v1.0]", + False, ), ( "foo", "", "[foo]", + False, + ), + ( + "", + "v1.0", + "", + True, + ), + ( + "", + "", + "", + True, + ), + ], +) +def test_model_logging_formatter_unstructured( + name: str, + version: str, + expected_model_fmt: str, + fmt_present_in_all: bool, + settings: Settings, + caplog, +): + settings.use_structured_logging = False + caplog.handler.setFormatter(ModelLoggerFormatter(settings)) + caplog.set_level(INFO) + + model_settings = ModelSettings( + name=name, implementation=SumModel, parameters=ModelParameters(version=version) + ) + + logger.info("Before model context") + with model_context(model_settings): + logger.info("Inside model context") + logger.info("After model context") + + log_records = caplog.get_records("call") + assert len(log_records) == 3 + + assert all(hasattr(lr, "model") for lr in log_records) + + if fmt_present_in_all: + assert all(lr.model == expected_model_fmt for lr in log_records) + else: + assert expected_model_fmt != log_records[0].model + assert expected_model_fmt == log_records[1].model + assert expected_model_fmt != log_records[2].model + + +@pytest.mark.parametrize( + "name, version, expected_model_fmt, fmt_present_in_all", + [ + ( + "foo", + "v1.0", + ', "model_name": "foo", "model_version": "v1.0"', + False, ), ( "foo", - None, - "[foo]", + "", + ', "model_name": "foo"', + False, ), + ( + "", + "v1.0", + "", + True, + ), + ("", "", "", True), ], ) -def test_model_logging_formatter(name, version, expected_fmt, caplog): - caplog.handler.setFormatter(ModelLoggerFormatter(LoggerFormat)) +def test_model_logging_formatter_structured( + name: str, + version: str, + expected_model_fmt: str, + fmt_present_in_all: bool, + settings: Settings, + caplog, +): + settings.use_structured_logging = True + caplog.handler.setFormatter(ModelLoggerFormatter(settings)) caplog.set_level(INFO) model_settings = ModelSettings( @@ -46,12 +123,18 @@ def test_model_logging_formatter(name, version, expected_fmt, caplog): logger.info("Inside model context") logger.info("After model context") - log_records = caplog.text.strip().split("\n") - assert len(caplog.messages) == 3 + _ = [json.loads(lr) for lr in caplog.text.strip().split("\n")] + log_records = caplog.get_records("call") + assert len(log_records) == 3 + + assert all(hasattr(lr, "model") for lr in log_records) - assert expected_fmt not in log_records[0] - assert expected_fmt in log_records[1] - assert expected_fmt not in log_records[2] + if fmt_present_in_all: + assert all(lr.model == expected_model_fmt for lr in log_records) + else: + assert expected_model_fmt != log_records[0].model + assert expected_model_fmt == log_records[1].model + assert expected_model_fmt != log_records[2].model @pytest.mark.parametrize("debug", [True, False]) @@ -66,3 +149,36 @@ def test_log_level_gets_persisted(debug: bool, settings: Settings, caplog): assert test_log_message in caplog.text else: assert test_log_message not in caplog.text + + +def test_configure_logger_when_called_multiple_times_with_same_logger(settings): + logger = configure_logger() + + assert len(logger.handlers) == 1 + handler = logger.handlers[0] + assert handler.name == _STREAM_HANDLER_NAME + assert ( + hasattr(handler.formatter, "use_structured_logging") + and handler.formatter.use_structured_logging is False + ) + + logger = configure_logger(settings) + + assert len(logger.handlers) == 1 + handler = logger.handlers[0] + assert handler.name == _STREAM_HANDLER_NAME + assert ( + hasattr(handler.formatter, "use_structured_logging") + and handler.formatter.use_structured_logging is False + ) + + settings.use_structured_logging = True + logger = configure_logger(settings) + + assert len(logger.handlers) == 1 + handler = logger.handlers[0] + assert handler.name == _STREAM_HANDLER_NAME + assert ( + hasattr(handler.formatter, "use_structured_logging") + and handler.formatter.use_structured_logging is True + )