Skip to content

Commit

Permalink
removes json-logger
Browse files Browse the repository at this point in the history
  • Loading branch information
rudolfix committed Sep 21, 2023
1 parent 0be1002 commit 7173485
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 31 deletions.
179 changes: 179 additions & 0 deletions dlt/common/runtime/json_logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@

import logging
from datetime import datetime # noqa: I251
import traceback
from logging import Logger
from typing import Any, List, Type

from dlt.common.json import json
from dlt.common.typing import DictStrAny, StrAny

EMPTY_VALUE = '-'
JSON_SERIALIZER = lambda log: json.dumps(log)
COMPONENT_ID = EMPTY_VALUE
COMPONENT_NAME = EMPTY_VALUE
COMPONENT_INSTANCE_INDEX = 0

# The list contains all the attributes listed in
# http://docs.python.org/library/logging.html#logrecord-attributes
RECORD_ATTR_SKIP_LIST = [
'asctime', 'created', 'exc_info', 'exc_text', 'filename', 'args',
'funcName', 'id', 'levelname', 'levelno', 'lineno', 'module', 'msg',
'msecs', 'msecs', 'message', 'name', 'pathname', 'process',
'processName', 'relativeCreated', 'thread', 'threadName', 'extra',
# Also exclude legacy 'props'
'props',
]

RECORD_ATTR_SKIP_LIST.append('stack_info')
EASY_TYPES = (str, bool, dict, float, int, list, type(None))

_default_formatter: Type[logging.Formatter] = None
_epoch = datetime(1970, 1, 1)


def config_root_logger() -> None:
"""
You must call this if you are using root logger.
Make all root logger' handlers produce JSON format
& remove duplicate handlers for request instrumentation logging.
Please made sure that you call this after you called "logging.basicConfig() or logging.getLogger()
"""
global _default_formatter
update_formatter_for_loggers([logging.root], _default_formatter)


def init(custom_formatter: Type[logging.Formatter] = None) -> None:
"""
This is supposed to be called only one time.
If **custom_formatter** is passed, it will (in non-web context) use this formatter over the default.
"""

global _default_formatter

if custom_formatter:
if not issubclass(custom_formatter, logging.Formatter):
raise ValueError('custom_formatter is not subclass of logging.Formatter', custom_formatter)

_default_formatter = custom_formatter if custom_formatter else JSONLogFormatter
logging._defaultFormatter = _default_formatter() # type: ignore

# go to all the initialized logger and update it to use JSON formatter
existing_loggers = list(map(logging.getLogger, logging.Logger.manager.loggerDict))
update_formatter_for_loggers(existing_loggers, _default_formatter)


class BaseJSONFormatter(logging.Formatter):
"""
Base class for JSON formatters
"""
base_object_common: DictStrAny = {}

def __init__(self, *args: Any, **kw: Any) -> None:
super(BaseJSONFormatter, self).__init__(*args, **kw)
if COMPONENT_ID and COMPONENT_ID != EMPTY_VALUE:
self.base_object_common["component_id"] = COMPONENT_ID
if COMPONENT_NAME and COMPONENT_NAME != EMPTY_VALUE:
self.base_object_common["component_name"] = COMPONENT_NAME
if COMPONENT_INSTANCE_INDEX and COMPONENT_INSTANCE_INDEX != EMPTY_VALUE:
self.base_object_common["component_instance_idx"] = COMPONENT_INSTANCE_INDEX

def format(self, record: logging.LogRecord) -> str: # noqa
log_object = self._format_log_object(record)
return JSON_SERIALIZER(log_object)

def _format_log_object(self, record: logging.LogRecord) -> DictStrAny:
utcnow = datetime.utcnow()
base_obj = {
"written_at": iso_time_format(utcnow),
"written_ts": epoch_nano_second(utcnow),
}
base_obj.update(self.base_object_common)
# Add extra fields
base_obj.update(self._get_extra_fields(record))
return base_obj

def _get_extra_fields(self, record: logging.LogRecord) -> StrAny:
fields: DictStrAny = {}

if record.args:
fields['msg'] = record.msg

for key, value in record.__dict__.items():
if key not in RECORD_ATTR_SKIP_LIST:
if isinstance(value, EASY_TYPES):
fields[key] = value
else:
fields[key] = repr(value)

# Always add 'props' to the root of the log, assumes props is a dict
if hasattr(record, 'props') and isinstance(record.props, dict):
fields.update(record.props)

return fields



def _sanitize_log_msg(record: logging.LogRecord) -> str:
return record.getMessage().replace('\n', '_').replace('\r', '_').replace('\t', '_')


class JSONLogFormatter(BaseJSONFormatter):
"""
Formatter for non-web application log
"""

def get_exc_fields(self, record: logging.LogRecord) -> StrAny:
if record.exc_info:
exc_info = self.format_exception(record.exc_info)
else:
exc_info = record.exc_text
return {
'exc_info': exc_info,
'filename': record.filename,
}

@classmethod
def format_exception(cls, exc_info: Any) -> str:
return ''.join(traceback.format_exception(*exc_info)) if exc_info else ''

def _format_log_object(self, record: logging.LogRecord) -> DictStrAny:
json_log_object = super(JSONLogFormatter, self)._format_log_object(record)
json_log_object.update({
"msg": _sanitize_log_msg(record),
"type": "log",
"logger": record.name,
"thread": record.threadName,
"level": record.levelname,
"module": record.module,
"line_no": record.lineno,
})

if record.exc_info or record.exc_text:
json_log_object.update(self.get_exc_fields(record))

return json_log_object


def update_formatter_for_loggers(loggers_iter: List[Logger], formatter: Type[logging.Formatter]) -> None:
"""
:param formatter:
:param loggers_iter:
"""
for logger in loggers_iter:
if not isinstance(logger, Logger):
raise RuntimeError("%s is not a logging.Logger instance", logger)
for handler in logger.handlers:
if not isinstance(handler.formatter, formatter):
handler.formatter = formatter()


def epoch_nano_second(datetime_: datetime) -> int:
return int((datetime_ - _epoch).total_seconds()) * 1000000000 + datetime_.microsecond * 1000


def iso_time_format(datetime_: datetime) -> str:
return '%04d-%02d-%02dT%02d:%02d:%02d.%03dZ' % (
datetime_.year, datetime_.month, datetime_.day, datetime_.hour, datetime_.minute, datetime_.second,
int(datetime_.microsecond / 1000))
26 changes: 12 additions & 14 deletions dlt/common/runtime/logger.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import contextlib
import logging
import json_logging
import traceback
from logging import LogRecord, Logger
from typing import Any, Iterator, Protocol
Expand Down Expand Up @@ -85,17 +84,6 @@ def format(self, record: LogRecord) -> str: # noqa: A003
return s


class _CustomJsonFormatter(json_logging.JSONLogFormatter):

version: StrStr = None

def _format_log_object(self, record: LogRecord, request_util: Any) -> Any:
json_log_object = super(_CustomJsonFormatter, self)._format_log_object(record, request_util)
if self.version:
json_log_object.update({"version": self.version})
return json_log_object


def _init_logging(logger_name: str, level: str, fmt: str, component: str, version: StrStr) -> Logger:
if logger_name == "root":
logging.basicConfig(level=level)
Expand All @@ -111,14 +99,24 @@ def _init_logging(logger_name: str, level: str, fmt: str, component: str, versio

# set right formatter
if is_json_logging(fmt):
from dlt.common.runtime import json_logging

class _CustomJsonFormatter(json_logging.JSONLogFormatter):
version: StrStr = None

def _format_log_object(self, record: LogRecord) -> Any:
json_log_object = super(_CustomJsonFormatter, self)._format_log_object(record)
if self.version:
json_log_object.update({"version": self.version})
return json_log_object

json_logging.COMPONENT_NAME = component
json_logging.JSON_SERIALIZER = json.dumps
if "process" in json_logging.RECORD_ATTR_SKIP_LIST:
json_logging.RECORD_ATTR_SKIP_LIST.remove("process")
# set version as class variable as we cannot pass custom constructor parameters
_CustomJsonFormatter.version = version
# the only thing method above effectively does is to replace the formatter
json_logging.init_non_web(enable_json=True, custom_formatter=_CustomJsonFormatter)
json_logging.init(custom_formatter=_CustomJsonFormatter)
if logger_name == "root":
json_logging.config_root_logger()
else:
Expand Down
14 changes: 1 addition & 13 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ requests = ">=2.26.0"
pendulum = ">=2.1.2"
simplejson = ">=3.17.5"
PyYAML = ">=5.4.1"
json-logging = "1.4.1rc0"
semver = ">=2.13.0"
hexbytes = ">=0.2.2"
tzdata = ">=2022.1"
Expand Down
6 changes: 3 additions & 3 deletions tests/common/runtime/test_logging.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import pytest
import json_logging
from importlib.metadata import version as pkg_version

from dlt.common import logger
Expand Down Expand Up @@ -70,11 +69,13 @@ def test_text_logger_init(environment: StrStr) -> None:

@pytest.mark.forked
def test_json_logger_init(environment: StrStr) -> None:
from dlt.common.runtime import json_logging

mock_image_env(environment)
mock_pod_env(environment)
init_test_logging(JsonLoggerConfiguration())
# correct component was set
json_logging.COMPONENT_NAME = "logger"
assert json_logging.COMPONENT_NAME == "logger"
logger.metrics("test health", extra={"metrics": "props"})
logger.metrics("test", extra={"metrics": "props"})
logger.warning("Warning message here")
Expand Down Expand Up @@ -103,7 +104,6 @@ def test_double_log_init(environment: StrStr) -> None:
# normal logger
# to json
init_test_logging(JsonLoggerConfiguration())
json_logging.COMPONENT_NAME = "logger"
logger.error("test json warning", extra={"metrics": "props"})
# to regular
init_test_logging(PureBasicConfiguration())
Expand Down

0 comments on commit 7173485

Please sign in to comment.