From 71734856a00080545ef010af1b0062eb4838798c Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Thu, 21 Sep 2023 16:00:59 +0200 Subject: [PATCH] removes json-logger --- dlt/common/runtime/json_logging.py | 179 +++++++++++++++++++++++++++ dlt/common/runtime/logger.py | 26 ++-- poetry.lock | 14 +-- pyproject.toml | 1 - tests/common/runtime/test_logging.py | 6 +- 5 files changed, 195 insertions(+), 31 deletions(-) create mode 100644 dlt/common/runtime/json_logging.py diff --git a/dlt/common/runtime/json_logging.py b/dlt/common/runtime/json_logging.py new file mode 100644 index 0000000000..bfff063dab --- /dev/null +++ b/dlt/common/runtime/json_logging.py @@ -0,0 +1,179 @@ + +import logging +from datetime import datetime # noqa: I251 +import traceback +from logging import Logger +from typing import Any, List, Type + +from dlt.common.json import json +from dlt.common.typing import DictStrAny, StrAny + +EMPTY_VALUE = '-' +JSON_SERIALIZER = lambda log: json.dumps(log) +COMPONENT_ID = EMPTY_VALUE +COMPONENT_NAME = EMPTY_VALUE +COMPONENT_INSTANCE_INDEX = 0 + +# The list contains all the attributes listed in +# http://docs.python.org/library/logging.html#logrecord-attributes +RECORD_ATTR_SKIP_LIST = [ + 'asctime', 'created', 'exc_info', 'exc_text', 'filename', 'args', + 'funcName', 'id', 'levelname', 'levelno', 'lineno', 'module', 'msg', + 'msecs', 'msecs', 'message', 'name', 'pathname', 'process', + 'processName', 'relativeCreated', 'thread', 'threadName', 'extra', + # Also exclude legacy 'props' + 'props', +] + +RECORD_ATTR_SKIP_LIST.append('stack_info') +EASY_TYPES = (str, bool, dict, float, int, list, type(None)) + +_default_formatter: Type[logging.Formatter] = None +_epoch = datetime(1970, 1, 1) + + +def config_root_logger() -> None: + """ + You must call this if you are using root logger. + Make all root logger' handlers produce JSON format + & remove duplicate handlers for request instrumentation logging. + Please made sure that you call this after you called "logging.basicConfig() or logging.getLogger() + """ + global _default_formatter + update_formatter_for_loggers([logging.root], _default_formatter) + + +def init(custom_formatter: Type[logging.Formatter] = None) -> None: + """ + This is supposed to be called only one time. + + If **custom_formatter** is passed, it will (in non-web context) use this formatter over the default. + """ + + global _default_formatter + + if custom_formatter: + if not issubclass(custom_formatter, logging.Formatter): + raise ValueError('custom_formatter is not subclass of logging.Formatter', custom_formatter) + + _default_formatter = custom_formatter if custom_formatter else JSONLogFormatter + logging._defaultFormatter = _default_formatter() # type: ignore + + # go to all the initialized logger and update it to use JSON formatter + existing_loggers = list(map(logging.getLogger, logging.Logger.manager.loggerDict)) + update_formatter_for_loggers(existing_loggers, _default_formatter) + + +class BaseJSONFormatter(logging.Formatter): + """ + Base class for JSON formatters + """ + base_object_common: DictStrAny = {} + + def __init__(self, *args: Any, **kw: Any) -> None: + super(BaseJSONFormatter, self).__init__(*args, **kw) + if COMPONENT_ID and COMPONENT_ID != EMPTY_VALUE: + self.base_object_common["component_id"] = COMPONENT_ID + if COMPONENT_NAME and COMPONENT_NAME != EMPTY_VALUE: + self.base_object_common["component_name"] = COMPONENT_NAME + if COMPONENT_INSTANCE_INDEX and COMPONENT_INSTANCE_INDEX != EMPTY_VALUE: + self.base_object_common["component_instance_idx"] = COMPONENT_INSTANCE_INDEX + + def format(self, record: logging.LogRecord) -> str: # noqa + log_object = self._format_log_object(record) + return JSON_SERIALIZER(log_object) + + def _format_log_object(self, record: logging.LogRecord) -> DictStrAny: + utcnow = datetime.utcnow() + base_obj = { + "written_at": iso_time_format(utcnow), + "written_ts": epoch_nano_second(utcnow), + } + base_obj.update(self.base_object_common) + # Add extra fields + base_obj.update(self._get_extra_fields(record)) + return base_obj + + def _get_extra_fields(self, record: logging.LogRecord) -> StrAny: + fields: DictStrAny = {} + + if record.args: + fields['msg'] = record.msg + + for key, value in record.__dict__.items(): + if key not in RECORD_ATTR_SKIP_LIST: + if isinstance(value, EASY_TYPES): + fields[key] = value + else: + fields[key] = repr(value) + + # Always add 'props' to the root of the log, assumes props is a dict + if hasattr(record, 'props') and isinstance(record.props, dict): + fields.update(record.props) + + return fields + + + +def _sanitize_log_msg(record: logging.LogRecord) -> str: + return record.getMessage().replace('\n', '_').replace('\r', '_').replace('\t', '_') + + +class JSONLogFormatter(BaseJSONFormatter): + """ + Formatter for non-web application log + """ + + def get_exc_fields(self, record: logging.LogRecord) -> StrAny: + if record.exc_info: + exc_info = self.format_exception(record.exc_info) + else: + exc_info = record.exc_text + return { + 'exc_info': exc_info, + 'filename': record.filename, + } + + @classmethod + def format_exception(cls, exc_info: Any) -> str: + return ''.join(traceback.format_exception(*exc_info)) if exc_info else '' + + def _format_log_object(self, record: logging.LogRecord) -> DictStrAny: + json_log_object = super(JSONLogFormatter, self)._format_log_object(record) + json_log_object.update({ + "msg": _sanitize_log_msg(record), + "type": "log", + "logger": record.name, + "thread": record.threadName, + "level": record.levelname, + "module": record.module, + "line_no": record.lineno, + }) + + if record.exc_info or record.exc_text: + json_log_object.update(self.get_exc_fields(record)) + + return json_log_object + + +def update_formatter_for_loggers(loggers_iter: List[Logger], formatter: Type[logging.Formatter]) -> None: + """ + :param formatter: + :param loggers_iter: + """ + for logger in loggers_iter: + if not isinstance(logger, Logger): + raise RuntimeError("%s is not a logging.Logger instance", logger) + for handler in logger.handlers: + if not isinstance(handler.formatter, formatter): + handler.formatter = formatter() + + +def epoch_nano_second(datetime_: datetime) -> int: + return int((datetime_ - _epoch).total_seconds()) * 1000000000 + datetime_.microsecond * 1000 + + +def iso_time_format(datetime_: datetime) -> str: + return '%04d-%02d-%02dT%02d:%02d:%02d.%03dZ' % ( + datetime_.year, datetime_.month, datetime_.day, datetime_.hour, datetime_.minute, datetime_.second, + int(datetime_.microsecond / 1000)) diff --git a/dlt/common/runtime/logger.py b/dlt/common/runtime/logger.py index 9fbd00ef22..f833d36608 100644 --- a/dlt/common/runtime/logger.py +++ b/dlt/common/runtime/logger.py @@ -1,6 +1,5 @@ import contextlib import logging -import json_logging import traceback from logging import LogRecord, Logger from typing import Any, Iterator, Protocol @@ -85,17 +84,6 @@ def format(self, record: LogRecord) -> str: # noqa: A003 return s -class _CustomJsonFormatter(json_logging.JSONLogFormatter): - - version: StrStr = None - - def _format_log_object(self, record: LogRecord, request_util: Any) -> Any: - json_log_object = super(_CustomJsonFormatter, self)._format_log_object(record, request_util) - if self.version: - json_log_object.update({"version": self.version}) - return json_log_object - - def _init_logging(logger_name: str, level: str, fmt: str, component: str, version: StrStr) -> Logger: if logger_name == "root": logging.basicConfig(level=level) @@ -111,14 +99,24 @@ def _init_logging(logger_name: str, level: str, fmt: str, component: str, versio # set right formatter if is_json_logging(fmt): + from dlt.common.runtime import json_logging + + class _CustomJsonFormatter(json_logging.JSONLogFormatter): + version: StrStr = None + + def _format_log_object(self, record: LogRecord) -> Any: + json_log_object = super(_CustomJsonFormatter, self)._format_log_object(record) + if self.version: + json_log_object.update({"version": self.version}) + return json_log_object + json_logging.COMPONENT_NAME = component - json_logging.JSON_SERIALIZER = json.dumps if "process" in json_logging.RECORD_ATTR_SKIP_LIST: json_logging.RECORD_ATTR_SKIP_LIST.remove("process") # set version as class variable as we cannot pass custom constructor parameters _CustomJsonFormatter.version = version # the only thing method above effectively does is to replace the formatter - json_logging.init_non_web(enable_json=True, custom_formatter=_CustomJsonFormatter) + json_logging.init(custom_formatter=_CustomJsonFormatter) if logger_name == "root": json_logging.config_root_logger() else: diff --git a/poetry.lock b/poetry.lock index 18698ff8ad..0a6acebb96 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2599,14 +2599,6 @@ category = "main" optional = true python-versions = ">=3.7" -[[package]] -name = "json-logging" -version = "1.4.1rc0" -description = "JSON Python Logging" -category = "main" -optional = false -python-versions = "*" - [[package]] name = "jsonpath-ng" version = "1.5.3" @@ -4837,7 +4829,7 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "1.1" python-versions = ">=3.8.1,<4.0" -content-hash = "020804442c6a1e8b4754549db382156691f3733a18026b47e0c1e95dedb1c74e" +content-hash = "3fae4453f619e90083e19168c598e3e1d111617d426f7b6929ff6b6b7ce2a5f8" [metadata.files] about-time = [ @@ -6025,10 +6017,6 @@ jmespath = [ {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, ] -json-logging = [ - {file = "json-logging-1.4.1rc0.tar.gz", hash = "sha256:381e00495bbd619d09c8c3d1fdd72c843f7045797ab63b42cfec5f7961e5b3f6"}, - {file = "json_logging-1.4.1rc0-py2.py3-none-any.whl", hash = "sha256:2b787c28f31fb4d8aabac16ac3816326031d92dd054bdabc9bbe68eb10864f77"}, -] jsonpath-ng = [ {file = "jsonpath-ng-1.5.3.tar.gz", hash = "sha256:a273b182a82c1256daab86a313b937059261b5c5f8c4fa3fc38b882b344dd567"}, {file = "jsonpath_ng-1.5.3-py2-none-any.whl", hash = "sha256:f75b95dbecb8a0f3b86fd2ead21c2b022c3f5770957492b9b6196ecccfeb10aa"}, diff --git a/pyproject.toml b/pyproject.toml index b9414935e3..4dfb1f82b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,6 @@ requests = ">=2.26.0" pendulum = ">=2.1.2" simplejson = ">=3.17.5" PyYAML = ">=5.4.1" -json-logging = "1.4.1rc0" semver = ">=2.13.0" hexbytes = ">=0.2.2" tzdata = ">=2022.1" diff --git a/tests/common/runtime/test_logging.py b/tests/common/runtime/test_logging.py index c7385351e8..35d50531a8 100644 --- a/tests/common/runtime/test_logging.py +++ b/tests/common/runtime/test_logging.py @@ -1,5 +1,4 @@ import pytest -import json_logging from importlib.metadata import version as pkg_version from dlt.common import logger @@ -70,11 +69,13 @@ def test_text_logger_init(environment: StrStr) -> None: @pytest.mark.forked def test_json_logger_init(environment: StrStr) -> None: + from dlt.common.runtime import json_logging + mock_image_env(environment) mock_pod_env(environment) init_test_logging(JsonLoggerConfiguration()) # correct component was set - json_logging.COMPONENT_NAME = "logger" + assert json_logging.COMPONENT_NAME == "logger" logger.metrics("test health", extra={"metrics": "props"}) logger.metrics("test", extra={"metrics": "props"}) logger.warning("Warning message here") @@ -103,7 +104,6 @@ def test_double_log_init(environment: StrStr) -> None: # normal logger # to json init_test_logging(JsonLoggerConfiguration()) - json_logging.COMPONENT_NAME = "logger" logger.error("test json warning", extra={"metrics": "props"}) # to regular init_test_logging(PureBasicConfiguration())