Skip to content

Commit eb4a28b

Browse files
committed
[SPARK-51274][PYTHON] PySparkLogger should respect the expected keyword arguments
### What changes were proposed in this pull request? `PySparkLogger` should respect the expected keyword arguments. Also, `debug`, `warn`, `critical`, `fatal`, and `log` are added to have proper docs. ### Why are the changes needed? Currently all of keyword arguments for `PySparkLogger` will be in the `context`, but it should respect the expected keyword arguments, like `exc_info`, `stack_info`, etc. ### Does this PR introduce _any_ user-facing change? Yes, the logging methods for `PySparkLogger` will respect the expected arguments. - before: ```py >>> from pyspark.logger.logger import PySparkLogger >>> logger = PySparkLogger.getLogger("TestLogger") >>> >>> logger.warning("This is an info log", exc_info=True, user="test_user_info", action="test_action_info") {"ts": "2025-02-21 10:46:53,786", "level": "WARNING", "logger": "TestLogger", "msg": "This is an info log", "context": {"exc_info": true, "user": "test_user_info", "action": "test_action_info"}} ``` - after ```py >>> logger.warning("This is an info log", exc_info=True, user="test_user_info", action="test_action_info") {"ts": "2025-02-21 10:47:36,351", "level": "WARNING", "logger": "TestLogger", "msg": "This is an info log", "context": {"user": "test_user_info", "action": "test_action_info"}, "exception": {"class": "UnknownException", "msg": "None", "stacktrace": ["NoneType: None"]}} ``` ### How was this patch tested? Added the related tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#50032 from ueshin/issues/SPARK-51274/logger. Authored-by: Takuya Ueshin <[email protected]> Signed-off-by: Takuya Ueshin <[email protected]>
1 parent e1842c7 commit eb4a28b

File tree

3 files changed

+123
-11
lines changed

3 files changed

+123
-11
lines changed

dev/sparktestsupport/modules.py

+2
Original file line numberDiff line numberDiff line change
@@ -1471,6 +1471,8 @@ def __hash__(self):
14711471
dependencies=[],
14721472
source_file_regexes=["python/pyspark/logger"],
14731473
python_test_goals=[
1474+
# doctests
1475+
"pyspark.logger.logger",
14741476
# unittests
14751477
"pyspark.logger.tests.test_logger",
14761478
"pyspark.logger.tests.connect.test_parity_logger",

python/pyspark/logger/logger.py

+105-11
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@
1818

1919
import logging
2020
import json
21-
from typing import cast, Optional
21+
import sys
22+
from typing import cast, Mapping, Optional, TYPE_CHECKING
23+
24+
if TYPE_CHECKING:
25+
from logging import _ArgsType, _ExcInfoType
2226

2327
SPARK_LOG_SCHEMA = (
2428
"ts TIMESTAMP, "
@@ -99,7 +103,7 @@ class PySparkLogger(logging.Logger):
99103
100104
>>> logger.info(
101105
... "This is an informational message",
102-
... extra={"user": "test_user", "action": "test_action"}
106+
... user="test_user", action="test_action"
103107
... )
104108
>>> log_output = stream.getvalue().strip().split('\\n')[0]
105109
>>> log = json.loads(log_output)
@@ -111,10 +115,8 @@ class PySparkLogger(logging.Logger):
111115
"logger": "ExampleLogger",
112116
"msg": "This is an informational message",
113117
"context": {
114-
"extra": {
115-
"user": "test_user",
116-
"action": "test_action"
117-
}
118+
"user": "test_user",
119+
"action": "test_action"
118120
}
119121
}
120122
"""
@@ -158,6 +160,17 @@ def getLogger(name: Optional[str] = None) -> "PySparkLogger":
158160

159161
return cast(PySparkLogger, pyspark_logger)
160162

163+
def debug(self, msg: object, *args: object, **kwargs: object) -> None:
164+
"""
165+
Log 'msg % args' with severity 'DEBUG' in structured JSON format.
166+
167+
Parameters
168+
----------
169+
msg : str
170+
The log message.
171+
"""
172+
super().debug(msg, *args, **kwargs) # type: ignore[arg-type]
173+
161174
def info(self, msg: object, *args: object, **kwargs: object) -> None:
162175
"""
163176
Log 'msg % args' with severity 'INFO' in structured JSON format.
@@ -167,7 +180,7 @@ def info(self, msg: object, *args: object, **kwargs: object) -> None:
167180
msg : str
168181
The log message.
169182
"""
170-
super().info(msg, *args, extra={"kwargs": kwargs})
183+
super().info(msg, *args, **kwargs) # type: ignore[arg-type]
171184

172185
def warning(self, msg: object, *args: object, **kwargs: object) -> None:
173186
"""
@@ -178,7 +191,20 @@ def warning(self, msg: object, *args: object, **kwargs: object) -> None:
178191
msg : str
179192
The log message.
180193
"""
181-
super().warning(msg, *args, extra={"kwargs": kwargs})
194+
super().warning(msg, *args, **kwargs) # type: ignore[arg-type]
195+
196+
if sys.version_info < (3, 13):
197+
198+
def warn(self, msg: object, *args: object, **kwargs: object) -> None:
199+
"""
200+
Log 'msg % args' with severity 'WARN' in structured JSON format.
201+
202+
Parameters
203+
----------
204+
msg : str
205+
The log message.
206+
"""
207+
super().warn(msg, *args, **kwargs) # type: ignore[arg-type]
182208

183209
def error(self, msg: object, *args: object, **kwargs: object) -> None:
184210
"""
@@ -189,9 +215,11 @@ def error(self, msg: object, *args: object, **kwargs: object) -> None:
189215
msg : str
190216
The log message.
191217
"""
192-
super().error(msg, *args, extra={"kwargs": kwargs})
218+
super().error(msg, *args, **kwargs) # type: ignore[arg-type]
193219

194-
def exception(self, msg: object, *args: object, **kwargs: object) -> None:
220+
def exception(
221+
self, msg: object, *args: object, exc_info: "_ExcInfoType" = True, **kwargs: object
222+
) -> None:
195223
"""
196224
Convenience method for logging an ERROR with exception information.
197225
@@ -203,4 +231,70 @@ def exception(self, msg: object, *args: object, **kwargs: object) -> None:
203231
If True, exception information is added to the logging message.
204232
This includes the exception type, value, and traceback. Default is True.
205233
"""
206-
super().error(msg, *args, exc_info=True, extra={"kwargs": kwargs})
234+
super().exception(msg, *args, exc_info=exc_info, **kwargs) # type: ignore[arg-type]
235+
236+
def critical(self, msg: object, *args: object, **kwargs: object) -> None:
237+
"""
238+
Log 'msg % args' with severity 'CRITICAL' in structured JSON format.
239+
240+
Parameters
241+
----------
242+
msg : str
243+
The log message.
244+
"""
245+
super().critical(msg, *args, **kwargs) # type: ignore[arg-type]
246+
247+
def log(self, level: int, msg: object, *args: object, **kwargs: object) -> None:
248+
"""
249+
Log 'msg % args' with the given severity in structured JSON format.
250+
251+
Parameters
252+
----------
253+
level : int
254+
The log level.
255+
msg : str
256+
The log message.
257+
"""
258+
super().log(level, msg, *args, **kwargs) # type: ignore[arg-type]
259+
260+
fatal = critical
261+
262+
def _log(
263+
self,
264+
level: int,
265+
msg: object,
266+
args: "_ArgsType",
267+
exc_info: Optional["_ExcInfoType"] = None,
268+
extra: Optional[Mapping[str, object]] = None,
269+
stack_info: bool = False,
270+
stacklevel: int = 1,
271+
**kwargs: object,
272+
) -> None:
273+
if extra is not None:
274+
kwargs["extra"] = extra
275+
super()._log(
276+
level=level,
277+
msg=msg,
278+
args=args,
279+
exc_info=exc_info,
280+
extra={"kwargs": kwargs},
281+
stack_info=stack_info,
282+
stacklevel=stacklevel,
283+
)
284+
285+
286+
def _test() -> None:
287+
import doctest
288+
import pyspark.logger.logger
289+
290+
globs = pyspark.logger.logger.__dict__.copy()
291+
(failure_count, test_count) = doctest.testmod(
292+
pyspark.logger.logger, globs=globs, optionflags=doctest.ELLIPSIS
293+
)
294+
295+
if failure_count:
296+
sys.exit(-1)
297+
298+
299+
if __name__ == "__main__":
300+
_test()

python/pyspark/logger/tests/test_logger.py

+16
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,22 @@ def test_log_info(self):
5353
)
5454
self.assertTrue("exception" not in log_json)
5555

56+
def test_log_info_with_exception(self):
57+
# SPARK-51274: PySparkLogger should respect the expected keyword arguments
58+
self.logger.info(
59+
"This is an info log", exc_info=True, user="test_user_info", action="test_action_info"
60+
)
61+
log_json = json.loads(self.handler.stream.getvalue().strip())
62+
63+
self.assertEqual(log_json["msg"], "This is an info log")
64+
self.assertEqual(
65+
log_json["context"], {"action": "test_action_info", "user": "test_user_info"}
66+
)
67+
self.assertTrue("exception" in log_json)
68+
self.assertTrue("class" in log_json["exception"])
69+
self.assertTrue("msg" in log_json["exception"])
70+
self.assertTrue("stacktrace" in log_json["exception"])
71+
5672
def test_log_warn(self):
5773
self.logger.warn("This is an warn log", user="test_user_warn", action="test_action_warn")
5874
log_json = json.loads(self.handler.stream.getvalue().strip())

0 commit comments

Comments
 (0)